Beispiel #1
0
def create_deep_rnn(layer,
                    layer_class,
                    depth,
                    layer_mask=None,
                    residual=False,
                    skip_connections=False,
                    bidir=False,
                    dropout=None,
                    init_state_layers=None,
                    name=None,
                    **kwargs):
    """
    (Deep) RNN with possible skip/residual connections, bidirectional, dropout
    """
    if init_state_layers:
        assert (len(init_state_layers) == depth)

    layers = [layer]
    for i in range(depth):
        if skip_connections and i > 0:
            layer = concat([layers[0], layer], axis=2)

        if init_state_layers:
            hid_init = init_state_layers[i]
        else:
            hid_init = init.Constant(0.)

        new_layer = layer_class(layer,
                                hid_init=hid_init,
                                mask_input=layer_mask,
                                name=name,
                                **kwargs)

        if bidir:
            layer_bw = layer_class(layer,
                                   mask_input=layer_mask,
                                   backwards=True,
                                   name=name,
                                   **kwargs)
            new_layer = concat([new_layer, layer_bw], axis=2)

        if residual:
            layer = ElemwiseSumLayer([layer, new_layer])
        else:
            layer = new_layer

        if skip_connections and i == depth - 1:
            layer = concat([layer] + layers[1:], axis=2)

        if dropout:
            layer = DropoutLayer(layer, p=dropout)

        # We need to apply the mask, otherwise there are problems with multiple
        # layers
        if layer_mask and i < depth - 1:
            layer = apply_mask(layer, layer_mask)
        layers.append(layer)

    return layers[1:]
    def build_network(self, vocab_size, input_var, mask_var, W_init):

        l_in = L.InputLayer(shape=(None, None, 1), input_var=input_var)
        l_mask = L.InputLayer(shape=(None, None), input_var=mask_var)
        l_embed = L.EmbeddingLayer(l_in,
                                   input_size=vocab_size,
                                   output_size=EMBED_DIM,
                                   W=W_init)

        l_fwd_1 = L.LSTMLayer(l_embed,
                              NUM_HIDDEN,
                              grad_clipping=GRAD_CLIP,
                              mask_input=l_mask,
                              gradient_steps=GRAD_STEPS,
                              precompute_input=True)
        l_bkd_1 = L.LSTMLayer(l_embed,
                              NUM_HIDDEN,
                              grad_clipping=GRAD_CLIP,
                              mask_input=l_mask,
                              gradient_steps=GRAD_STEPS,
                              precompute_input=True,
                              backwards=True)

        l_all_1 = L.concat([l_fwd_1, l_bkd_1], axis=2)

        l_fwd_2 = L.LSTMLayer(l_all_1,
                              NUM_HIDDEN,
                              grad_clipping=GRAD_CLIP,
                              mask_input=l_mask,
                              gradient_steps=GRAD_STEPS,
                              precompute_input=True)
        l_bkd_2 = L.LSTMLayer(l_all_1,
                              NUM_HIDDEN,
                              grad_clipping=GRAD_CLIP,
                              mask_input=l_mask,
                              gradient_steps=GRAD_STEPS,
                              precompute_input=True,
                              backwards=True)

        l_fwd_1_slice = L.SliceLayer(l_fwd_1, -1, 1)
        l_bkd_1_slice = L.SliceLayer(l_bkd_1, 0, 1)
        y_1 = L.ElemwiseSumLayer([l_fwd_1_slice, l_bkd_1_slice])

        l_fwd_2_slice = L.SliceLayer(l_fwd_2, -1, 1)
        l_bkd_2_slice = L.SliceLayer(l_bkd_2, 0, 1)
        y_2 = L.ElemwiseSumLayer([l_fwd_2_slice, l_bkd_2_slice])

        y = L.concat([y_1, y_2], axis=1)
        g = L.DenseLayer(y,
                         num_units=EMBED_DIM,
                         nonlinearity=lasagne.nonlinearities.tanh)
        l_out = L.DenseLayer(g,
                             num_units=vocab_size,
                             W=l_embed.W.T,
                             nonlinearity=lasagne.nonlinearities.softmax)

        return l_out
Beispiel #3
0
 def inception(network,no_1x1=64, no_3x3r=96, no_3x3=128, no_5x5r=16, no_5x5=32, no_pool=32):
     out1=layers.Conv2DLayer(network,num_filters=no_1x1,filter_size=(1,1),
                            nonlinearity=nonLinear.leaky_rectify,
                            W=init.GlorotUniform(gain='relu'),pad='same'
                            )
     out3=layers.Conv2DLayer(network,num_filters=no_3x3r,filter_size=(1,1),
                            nonlinearity=nonLinear.leaky_rectify,
                            W=init.GlorotUniform(gain='relu'),pad='same'
                            )
     out3=layers.Conv2DLayer(out3,num_filters=no_3x3,filter_size=(3,3),
                            nonlinearity=nonLinear.leaky_rectify,
                            W=init.GlorotUniform(gain='relu'),pad='same'
                            )
     out5=layers.Conv2DLayer(network,num_filters=no_5x5r,filter_size=(1,1),
                            nonlinearity=nonLinear.leaky_rectify,
                            W=init.GlorotUniform(gain='relu'),pad='same'
                            )
     out5=layers.Conv2DLayer(out5,num_filters=no_5x5,filter_size=(5,5),
                            nonlinearity=nonLinear.leaky_rectify,
                            W=init.GlorotUniform(gain='relu'),pad='same'
                            )
     outpool=layers.MaxPool2DLayer(network,3,stride=1,pad=1)
     outpool=layers.Conv2DLayer(outpool,num_filters=no_pool,filter_size=(1,1),
                            nonlinearity=nonLinear.leaky_rectify,
                            W=init.GlorotUniform(gain='relu'),pad='same'
                            )
     return layers.concat([out1,out3,out5,outpool])
    def get_context(self, conv_in, avg=False):
        suf = '_avg' if avg else ''

        conv_out = []
        # for n in [2,3,4,5,6,7,8,9]:
        # for n in [2,3,4,5]:
        for n in self.args.context_ngrams:
            conv = conv_in
            for i in range(self.args.conv_layers):
                conv = L.Conv1DLayer(
                    conv,
                    128,
                    n,
                    name='conv_window_%d(%d)%s' % (n, i, suf),
                    # W=HeNormal('relu') if not avg else Constant()) # (100, 128, 15-n+1)
                    W=GlorotNormal('relu')
                    if not avg else Constant())  # (100, 128, 15-n+1)

            conv = L.MaxPool1DLayer(
                conv, self.args.window_size -
                (n - 1) * self.args.conv_layers)  # (100, 128, 1)
            conv = L.flatten(conv, 2)  # (100, 128)
            conv_out.append(conv)

        x = L.concat(conv_out, axis=1)  # (100, 1024)

        return x
Beispiel #5
0
    def __create_toplogy__(self, input_var_first=None, input_var_second=None):
        # define network topology
        if (self.conf.rep % 2 != 0):
            raise ValueError("Representation size should be divisible by two as it's formed by combining two crossmodal translations", self.conf.rep)

        # input layers
        l_in_first  = InputLayer(shape=(self.conf.batch_size, self.conf.mod1size), input_var=input_var_first)
        l_in_second = InputLayer(shape=(self.conf.batch_size, self.conf.mod2size), input_var=input_var_second)

        # first -> second
        l_hidden1_first   = DenseLayer(l_in_first, num_units=self.conf.hdn, nonlinearity=self.conf.act, W=GlorotUniform())         # enc1
        l_hidden2_first   = DenseLayer(l_hidden1_first, num_units=self.conf.rep//2, nonlinearity=self.conf.act, W=GlorotUniform()) # enc2
        l_hidden2_first_d = DropoutLayer(l_hidden2_first, p=self.conf.dropout)
        l_hidden3_first   = DenseLayer(l_hidden2_first_d, num_units=self.conf.hdn, nonlinearity=self.conf.act, W=GlorotUniform())    # dec1
        l_out_first       = DenseLayer(l_hidden3_first, num_units=self.conf.mod2size, nonlinearity=self.conf.act, W=GlorotUniform()) # dec2

        if self.conf.untied:
            # FREE
            l_hidden1_second   = DenseLayer(l_in_second, num_units=self.conf.hdn, nonlinearity=self.conf.act, W=GlorotUniform())         # enc1
            l_hidden2_second   = DenseLayer(l_hidden1_second, num_units=self.conf.rep//2, nonlinearity=self.conf.act, W=GlorotUniform()) # enc2
            l_hidden2_second_d = DropoutLayer(l_hidden2_second, p=self.conf.dropout)
            l_hidden3_second   = DenseLayer(l_hidden2_second_d, num_units=self.conf.hdn, nonlinearity=self.conf.act, W=GlorotUniform())    # dec1
            l_out_second       = DenseLayer(l_hidden3_second, num_units=self.conf.mod1size, nonlinearity=self.conf.act, W=GlorotUniform()) # dec2
        else:
            # TIED middle
            l_hidden1_second   = DenseLayer(l_in_second, num_units=self.conf.hdn, nonlinearity=self.conf.act, W=GlorotUniform())             # enc1
            l_hidden2_second   = DenseLayer(l_hidden1_second, num_units=self.conf.rep//2, nonlinearity=self.conf.act, W=l_hidden3_first.W.T) # enc2
            l_hidden2_second_d = DropoutLayer(l_hidden2_second, p=self.conf.dropout)
            l_hidden3_second   = DenseLayer(l_hidden2_second_d, num_units=self.conf.hdn, nonlinearity=self.conf.act, W=l_hidden2_first.W.T) # dec1
            l_out_second       = DenseLayer(l_hidden3_second, num_units=self.conf.mod1size, nonlinearity=self.conf.act, W=GlorotUniform())  # dec2

        l_out = concat([l_out_first, l_out_second])

        return l_out, l_hidden2_first, l_hidden2_second
    def get_conv_input(self, sidx, tidx, avg=False):
        suf = '_avg' if avg else ''

        feat_embs = [
            self.manager.feats[name].get_emb_layer(sidx, tidx, avg=avg)
            for name in self.args.source_feats
        ]

        # TODO: change the meaning
        if self.args.lex == 'mix':
            concat_emb = L.ElemwiseSumLayer(feat_embs)  # (100, 15, 256)
        else:
            concat_emb = L.concat(feat_embs, axis=2)  # (100, 15, 256+100)

        pos = np.array([0] * (self.args.window_size / 2) + [1] + [0] *
                       (self.args.window_size / 2)).astype(
                           theano.config.floatX)
        post = theano.shared(pos[np.newaxis, :, np.newaxis],
                             borrow=True)  # (1, 15, 1)
        posl = L.InputLayer(
            (None, self.args.window_size, 1),
            input_var=T.extra_ops.repeat(post, sidx.shape[0],
                                         axis=0))  # (100, 15, 1)
        conv_in = L.concat([concat_emb, posl], axis=2)  # (100, 15, 256+1)

        if self.args.pos_emb:
            posint = L.flatten(
                L.ExpressionLayer(posl,
                                  lambda x: T.cast(x, 'int64')))  # (100, 15)
            pos_emb = L.EmbeddingLayer(
                posint,
                self.args.window_size,
                8,
                name='epos' + suf,
                W=Normal(0.01) if not avg else Constant())  # (100, 15, 8)
            pos_emb.params[pos_emb.W].remove('regularizable')
            conv_in = L.concat([concat_emb, posl, pos_emb],
                               axis=2)  # (100, 15, 256+1+8)

        # # squeeze
        # if self.args.squeeze:
        #     conv_in = L.DenseLayer(conv_in, num_units=self.args.squeeze, name='squeeze'+suf, num_leading_axes=2,
        #                     W=HeNormal('relu')) # (100, 15, 256)

        conv_in = L.dimshuffle(conv_in, (0, 2, 1))  # (100, 256+1, 15)

        return conv_in
Beispiel #7
0
def conv4_net_dense_color(data, ndim, pad='same'):
    res = conv_nonl(data, 6, '1', pad=pad)
    res = conv_nonl(res, 12, '2', pad=pad)
    res = conv_nonl(res, 24, '3', pad=pad)
    res = L.concat([data, res], axis=1, name='concat')
    res = L.DimshuffleLayer(res, (0, 2, 3, 1), name='transpose')
    res = L2NormLayer(res, 1e-8, name='l2norm')
    res = NormedDense(res, ndim, name='normed_dense')
    return res
Beispiel #8
0
def create_deep_rnn(layer, layer_class, depth, layer_mask=None, residual=False,
                    skip_connections=False, bidir=False, dropout=None,
                    init_state_layers=None, **kwargs):
    """
    (Deep) RNN with possible skip/residual connections, bidirectional, dropout
    """
    layers = [layer]
    for i in range(depth):
        if skip_connections and i > 0:
            layer = concat([layers[0], layer], axis=2)

        if init_state_layers:
            hid_init = init_state_layers[i]
        else:
            hid_init = init.Constant(0.)

        new_layer = layer_class(layer, hid_init=hid_init,
                                mask_input=layer_mask, **kwargs)

        if bidir:
            layer_bw = layer_class(layer, mask_input=layer_mask,
                                   backwards=True, **kwargs)
            new_layer = concat([new_layer, layer_bw], axis=2)

        if residual:
            layer = ElemwiseSumLayer([layer, new_layer])
        else:
            layer = new_layer

        if skip_connections and i == depth-1:
            layer = concat([layer] + layers[1:], axis=2)

        if dropout:
            layer = DropoutLayer(layer, p=dropout)

        layers.append(layer)

    return layers[1:]
Beispiel #9
0
def buildNetwork(input_var=None):
    net = {}

    # The input shape is (freq, time) -> (130,300)
    net['input'] = InputLayer((None, 129, 300), input_var=input_var, W=GlorotUniform('relu'), b=Constant(0.0))
    print "input: {}".format(net['input'].output_shape[1:])
    # conv1
    net['conv1'] = Conv1DLayer(net['input'], num_filters=256, filter_size=4, W=GlorotUniform('relu'), b=Constant(0.0))
    print "conv1: {}".format(net['conv1'].output_shape[1:])
    # pool1
    net['pool1'] = Pool1DLayer(net['conv1'], pool_size=4)
    print "pool1: {}".format(net['pool1'].output_shape[1:])

    # conv2
    net['conv2'] = Conv1DLayer(net['pool1'], num_filters=256, filter_size=4, W=GlorotUniform('relu'), b=Constant(0.0))
    print "conv2: {}".format(net['conv2'].output_shape[1:])
    # pool2
    net['pool2'] = Pool1DLayer(net['conv2'], pool_size=2)
    print "pool2: {}".format(net['pool2'].output_shape[1:])

    # conv3
    net['conv3'] = Conv1DLayer(net['pool2'], num_filters=512, filter_size=4, W=GlorotUniform('relu'), b=Constant(0.0))
    print "conv3: {}".format(net['conv3'].output_shape[1:])

    # global pool
    net['pool3_1'] = GlobalPoolLayer(net['conv3'], pool_function=T.mean)
    print "pool3_1: {}".format(net['pool3_1'].output_shape[1:])

    net['pool3_2'] = GlobalPoolLayer(net['conv3'], pool_function=T.max)
    print "pool3_2: {}".format(net['pool3_2'].output_shape[1:])

    net['pool3'] = concat((net['pool3_1'], net['pool3_2']), axis=1)
    print "pool3: {}".format(net['pool3'].output_shape[1:])

    # fc6
    net['fc6'] = DenseLayer(net['pool3'], num_units=2048,
                            nonlinearity=lasagne.nonlinearities.rectify, W=GlorotUniform('relu'), b=Constant(0.0))
    print "fc6: {}".format(net['fc6'].output_shape[1:])

    # fc7
    net['fc7'] = DenseLayer(net['fc6'], num_units=2048,
                            nonlinearity=lasagne.nonlinearities.rectify, W=GlorotUniform('relu'), b=Constant(0.0))
    print "fc7: {}".format(net['fc7'].output_shape[1:])

    # output
    net['output'] = DenseLayer(net['fc7'], num_units=100,
                               nonlinearity=lasagne.nonlinearities.sigmoid, W=GlorotUniform('relu'), b=Constant(0.0))
    print "output: {}".format(net['output'].output_shape[1:])

    return net
def build_lstm_reader(vocab_size, input_var=T.itensor3(), mask_var=T.tensor3(), skip_connect=True):
    # the input layer
    l_in = L.InputLayer(shape=(None, None, 1), input_var=input_var)
    # the mask layer
    l_mask = L.InputLayer(shape=(None, None), input_var=mask_var)
    # the lookup table of word embeddings
    l_embed = L.EmbeddingLayer(l_in, vocab_size, EMBED_DIM)

    # the 1st lstm layer
    l_fwd_1 = L.LSTMLayer(l_embed, NUM_HIDDEN, grad_clipping=GRAD_CLIP, mask_input=l_mask,
            gradient_steps=GRAD_STEPS, precompute_input=True)

    # the 2nd lstm layer
    if skip_connect:
        # construct skip connection from the lookup table to the 2nd layer
        batch_size, seq_len, _ = input_var.shape
        # concatenate the last dimension of l_fwd_1 and embed
        l_fwd_1_shp = L.ReshapeLayer(l_fwd_1, (-1, NUM_HIDDEN))
        l_embed_shp = L.ReshapeLayer(l_embed, (-1, EMBED_DIM))
        to_next_layer = L.ReshapeLayer(L.concat([l_fwd_1_shp, l_embed_shp], axis=1),
                (batch_size, seq_len, NUM_HIDDEN+EMBED_DIM)) 
    else:
        to_next_layer = l_fwd_1

    l_fwd_2 = L.LSTMLayer(to_next_layer, NUM_HIDDEN, grad_clipping=GRAD_CLIP, mask_input=l_mask,
            gradient_steps=GRAD_STEPS, precompute_input=True)

    # slice final states of both lstm layers
    l_fwd_1_slice = L.SliceLayer(l_fwd_1, -1, 1)
    l_fwd_2_slice = L.SliceLayer(l_fwd_2, -1, 1)

    # g will be used to score the words based on their embeddings
    g = L.DenseLayer(L.concat([l_fwd_1_slice, l_fwd_2_slice], axis=1), num_units=EMBED_DIM)
    # W is shared with the embedding layer
    l_out = L.DenseLayer(g, num_units=vocab_size, W=l_embed.W.T, nonlinearity=lasagne.nonlinearities.softmax)
    return l_out
Beispiel #11
0
def buildNetwork(input_var=None):
    net = {}

    net['input'] = InputLayer((None, 12, 300), input_var=input_var)
    print "input: {}".format(net['input'].output_shape[1:])
    # conv1
    net['conv1'] = Conv1DLayer(net['input'], num_filters=256, filter_size=4, nonlinearity=rectify)
    print "conv1: {}".format(net['conv1'].output_shape[1:])
    # pool1
    net['pool1'] = Pool1DLayer(net['conv1'], pool_size=4)
    print "pool1: {}".format(net['pool1'].output_shape[1:])

    # conv2
    net['conv2'] = Conv1DLayer(net['conv1'], num_filters=256, filter_size=4, nonlinearity=rectify)
    print "conv2: {}".format(net['conv2'].output_shape[1:])
    # pool2
    net['pool2'] = Pool1DLayer(net['conv2'], pool_size=1)
    print "pool2: {}".format(net['pool2'].output_shape[1:])

    # conv3
    net['conv3'] = Conv1DLayer(net['conv2'], num_filters=512, filter_size=4)
    print "conv3: {}".format(net['conv3'].output_shape[1:])

    # global pool
    net['pool3_1'] = GlobalPoolLayer(net['conv3'], pool_function=T.mean)
    print "pool3_1: {}".format(net['pool3_1'].output_shape[1:])

    net['pool3_2'] = GlobalPoolLayer(net['conv3'], pool_function=T.max)
    print "pool3_2: {}".format(net['pool3_2'].output_shape[1:])

    net['pool3'] = concat((net['pool3_1'], net['pool3_2']), axis=1)
    print "pool3: {}".format(net['pool3'].output_shape[1:])

    # fc6
    net['fc6'] = DenseLayer(net['pool3'], num_units=2048,
                            nonlinearity=lasagne.nonlinearities.rectify)
    print "fc6: {}".format(net['fc6'].output_shape[1:])
    # fc7
    net['fc7'] = DenseLayer(net['fc6'], num_units=2048,
                            nonlinearity=lasagne.nonlinearities.rectify)
    print "fc7: {}".format(net['fc7'].output_shape[1:])
    # output
    net['output'] = DenseLayer(net['fc7'], num_units=256,
                               nonlinearity=lasagne.nonlinearities.sigmoid)
    print "output: {}".format(net['output'].output_shape[1:])

    return net
def build_discriminator_lstm(params, gate_params, cell_params):
    from lasagne.layers import InputLayer, DenseLayer, concat
    from lasagne.layers.recurrent import LSTMLayer
    from lasagne.regularization import l2, regularize_layer_params
    # from layers import MinibatchLayer
    # input layers
    l_in = InputLayer(
        shape=params['input_shape'], name='d_in')
    l_mask = InputLayer(
        shape=params['mask_shape'], name='d_mask')

    # recurrent layers for bidirectional network
    l_forward = LSTMLayer(
        l_in, params['n_units'], grad_clipping=params['grad_clip'],
        ingate=gate_params, forgetgate=gate_params,
        cell=cell_params, outgate=gate_params,
        nonlinearity=params['non_linearities'][0], only_return_final=True,
        mask_input=l_mask)
    l_backward = LSTMLayer(
        l_in, params['n_units'], grad_clipping=params['grad_clip'],
        ingate=gate_params, forgetgate=gate_params,
        cell=cell_params, outgate=gate_params,
        nonlinearity=params['non_linearities'][1], only_return_final=True,
        mask_input=l_mask, backwards=True)

    # concatenate output of forward and backward layers
    l_concat = concat([l_forward, l_backward], axis=1)

    # minibatch layer on forward and backward layers
    # l_minibatch = MinibatchLayer(l_concat, num_kernels=100)

    # output layer
    l_out = DenseLayer(
        l_concat, num_units=params['n_output_units'],
        nonlinearity=params['non_linearities'][2])

    regularization = regularize_layer_params(
        l_out, l2) * params['regularization']

    class Discriminator:
        def __init__(self, l_in, l_mask, l_out):
            self.l_in = l_in
            self.l_mask = l_mask
            self.l_out = l_out
            self.regularization = regularization

    return Discriminator(l_in, l_mask, l_out)
def build_model():
    net = {}
    net['input'] = InputLayer((None, 512 * 20, 3, 3))

    au_fc_layers = []
    for i in range(20):
        net['roi_AU_N_' + str(i)] = SliceLayer(net['input'],
                                               indices=slice(
                                                   i * 512, (i + 1) * 512),
                                               axis=1)

        #try to adding upsampling here for more conv

        net['Roi_upsample_' + str(i)] = Upscale2DLayer(net['roi_AU_N_' +
                                                           str(i)],
                                                       scale_factor=2)

        net['conv_roi_' + str(i)] = ConvLayer(net['Roi_upsample_' + str(i)],
                                              512, 3)

        net['au_fc_' + str(i)] = DenseLayer(net['conv_roi_' + str(i)],
                                            num_units=150)

        au_fc_layers += [net['au_fc_' + str(i)]]

    #
    net['local_fc'] = concat(au_fc_layers)
    net['local_fc2'] = DenseLayer(net['local_fc'], num_units=2048)

    net['local_fc_dp'] = DropoutLayer(net['local_fc2'], p=0.5)

    # net['fc_comb']=concat([net['au_fc_layer'],net['local_fc_dp']])

    # net['fc_dense']=DenseLayer(net['fc_comb'],num_units=1024)

    # net['fc_dense_dp']=DropoutLayer(net['fc_dense'],p=0.3)

    net['real_out'] = DenseLayer(net['local_fc_dp'],
                                 num_units=12,
                                 nonlinearity=sigmoid)

    # net['final']=concat([net['pred_pos_layer'],net['output_layer']])

    return net
    def score_fused_convnets(self,
                             fusion_type,
                             input_var1=None,
                             input_var2=None,
                             weights_dir_depth=None,
                             weights_dir_rgb=None,
                             bottleneck_W=None,
                             weights_dir=None):

        net = OrderedDict()
        rgb_net = self.simple_convnet(4,
                                      input_var=input_var1,
                                      bottleneck_W=bottleneck_W)
        depth_net = self.simple_convnet(1,
                                        input_var=input_var2,
                                        bottleneck_W=bottleneck_W)
        if weights_dir_depth is not None and weights_dir_rgb is not None:
            lw_depth = LoadWeights(weights_dir_depth, depth_net)
            lw_depth.load_weights_numpy()
            lw_rgb = LoadWeights(weights_dir_rgb, rgb_net)
            lw_rgb.load_weights_numpy()
        if fusion_type == self.LOCAL:
            net['reshape_depth'] = reshape(depth_net['output'],
                                           ([0], 1, 1, [1]))
            net['reshape_rgb'] = reshape(rgb_net['output'], ([0], 1, 1, [1]))
            net['concat'] = concat([net['reshape_depth'], net['reshape_rgb']])
            net['lcl'] = LocallyConnected2DLayer(net['concat'],
                                                 1, (1, 1),
                                                 untie_biases=True,
                                                 nonlinearity=None)
            net['output'] = reshape(net['lcl'], ([0], [3]))
        elif fusion_type == self.SUM:
            net['output'] = ElemwiseSumLayer(
                [depth_net['output'], rgb_net['output']], coeffs=0.5)

        if weights_dir is not None:
            lw = LoadWeights(weights_dir, net)
            lw.load_weights_numpy()
        return net
Beispiel #15
0
class decoder_step:
    #inputs
    encoder = L.InputLayer((None, None, CODE_SIZE), name='encoded sequence')
    encoder_mask = L.InputLayer((None, None), name='encoded sequence')

    inp = L.InputLayer((None, ), name='current character')

    l_target_emb = L.EmbeddingLayer(inp, dst_voc.len, 128)

    #recurrent part

    l_rnn1 = AutoLSTMCell(l_target_emb, 128, name="lstm1")

    query = L.DenseLayer(l_rnn1.out, 128, nonlinearity=None)
    attn = AttentionLayer(encoder, query, 128, mask_input=encoder_mask)['attn']

    l_rnn = L.concat([attn, l_rnn1.out, l_target_emb])

    l_rnn2 = AutoLSTMCell(l_rnn, 128, name="lstm1")

    next_token_probas = L.DenseLayer(l_rnn2.out,
                                     dst_voc.len,
                                     nonlinearity=T.nnet.softmax)

    #pick next token from predicted probas
    next_token = ProbabilisticResolver(next_token_probas)

    tau = T.scalar("sample temperature", "float32")

    next_token_temperatured = TemperatureResolver(next_token_probas, tau)
    next_token_greedy = GreedyResolver(next_token_probas)

    auto_updates = {
        **l_rnn1.get_automatic_updates(),
        **l_rnn2.get_automatic_updates()
    }
    def get_actor(self, sidx, tidx, valid, avg=False):
        suf = '_avg' if avg else ''
        feat_embs = [
            self.manager.feats[name].get_emb_layer(sidx, tidx, avg=avg)
            for name in self.args.source_feats
        ]

        x = L.concat(feat_embs, axis=2)  # (100, 26, 256+32+32+...)
        if self.args.squeeze:
            x = L.DenseLayer(x,
                             num_units=self.args.squeeze,
                             name='h0' + suf,
                             num_leading_axes=2,
                             W=HeNormal('relu'))  # (100, 26, 256)

        x = L.flatten(x)  # (100, 26*256)

        h1 = L.DenseLayer(x,
                          num_units=self.args.nh1,
                          name='h1' + suf,
                          W=HeNormal('relu'))  # (100, 512)

        h1 = L.dropout(h1, self.args.dropout)

        taggers = {}
        if self.args.aux_tagger:
            hids = [h1]
            for name in self.args.target_feats:
                hid = L.DenseLayer(h1,
                                   256,
                                   name='hid-%s%s' % (name, suf),
                                   W=HeNormal('relu'))  # (100, 512)
                hids.append(hid)
                hid = L.dropout(hid, self.args.dropout)
                # h1 = L.dropout(h1, self.args.dropout)
                taggers[name] = L.DenseLayer(hid,
                                             len(self.manager.feats[name].map),
                                             name='tagger-%s' % name,
                                             W=HeNormal(),
                                             nonlinearity=softmax)  # (100, 25)
            h1 = L.concat(hids, axis=1)

        h2 = L.DenseLayer(h1,
                          num_units=self.args.nh2,
                          name='h2' + suf,
                          W=HeNormal('relu'))  # (100, 256)

        h2 = L.dropout(h2, self.args.dropout)
        h3y = L.DenseLayer(h2,
                           num_units=self.args.nh3,
                           name='h3y' + suf,
                           W=HeNormal(),
                           nonlinearity=softmax)  # (100, 4) num of actions
        h3s = L.concat(
            [h2, h3y], axis=1
        )  # (100, 256+4+4), this way shouldn't output <UNK> if its not SHIFT
        h3z = L.DenseLayer(h2,
                           num_units=self.args.size['label'],
                           name='h3z' + suf,
                           W=HeNormal(),
                           nonlinearity=softmax)  # (100, 25) number of labels

        if avg:
            set_all_zero([h3y, h3z] + taggers.values())

        return h3y, h3z, taggers
    def input_fused_convnets(self,
                             fusion_type,
                             input_var1=None,
                             input_var2=None,
                             bottleneck_W=None):
        net = OrderedDict()
        net['input_rgb'] = InputLayer((None, 4, 128, 128),
                                      input_var=input_var1)
        layer = 0
        net['input_depth'] = InputLayer((None, 1, 128, 128),
                                        input_var=input_var2)
        layer += 1

        if fusion_type == self.CONCAT:
            net['merge'] = concat([net['input_rgb'], net['input_depth']])
            layer += 1
        elif fusion_type == self.CONCATCONV:
            net['concat'] = concat([net['input_rgb'], net['input_depth']])
            layer += 1
            net['merge'] = Conv2DLayer(net['concat'],
                                       num_filters=1,
                                       filter_size=(1, 1),
                                       nonlinearity=None)
            layer += 1

        for i in range(self._net_specs_dict['num_conv_layers']):
            # Add convolution layers
            net['conv{0:d}'.format(i + 1)] = Conv2DLayer(
                net.values()[layer],
                num_filters=self._net_specs_dict['num_conv_filters'][i],
                filter_size=(self._net_specs_dict['conv_filter_size'][i], ) *
                2,
                pad='same')
            layer += 1
            if self._net_specs_dict['num_conv_layers'] <= 2:
                # Add pooling layers
                net['pool{0:d}'.format(i + 1)] = MaxPool2DLayer(
                    net.values()[layer], pool_size=(3, 3))
                layer += 1
            else:
                if i < 4:
                    if (i + 1) % 2 == 0:
                        # Add pooling layers
                        net['pool{0:d}'.format(i + 1)] = MaxPool2DLayer(
                            net.values()[layer], pool_size=(3, 3))
                        layer += 1
                else:
                    if (i + 1) == 7:
                        # Add pooling layers
                        net['pool{0:d}'.format(i + 1)] = MaxPool2DLayer(
                            net.values()[layer], pool_size=(3, 3))
                        layer += 1

        # Add fc-layers
        net['fc1'] = DenseLayer(net.values()[layer],
                                self._net_specs_dict['num_fc_units'][0])
        # Add dropout layer
        net['dropout1'] = dropout(net['fc1'], p=self._model_hp_dict['p'])
        net['fc2'] = DenseLayer(net['dropout1'],
                                self._net_specs_dict['num_fc_units'][1])
        # Add dropout layer
        net['dropout2'] = dropout(net['fc2'], p=self._model_hp_dict['p'])
        if bottleneck_W is not None:
            # Add bottleneck layer
            net['bottleneck'] = DenseLayer(net['dropout2'], 30)
            # Add output layer(linear activation because it's regression)
            net['output'] = DenseLayer(
                net['bottleneck'],
                3 * self._num_joints,
                W=bottleneck_W[0:30],
                nonlinearity=lasagne.nonlinearities.tanh)
        else:
            # Add output layer(linear activation because it's regression)
            net['output'] = DenseLayer(
                net['dropout2'],
                3 * self._num_joints,
                nonlinearity=lasagne.nonlinearities.tanh)
        return net
    def build_network(self, vocab_size, doc_var, query_var, docmask_var,
                      qmask_var, W_init):

        l_docin = L.InputLayer(shape=(None, None, 1), input_var=doc_var)
        l_qin = L.InputLayer(shape=(None, None, 1), input_var=query_var)
        l_docmask = L.InputLayer(shape=(None, None), input_var=docmask_var)
        l_qmask = L.InputLayer(shape=(None, None), input_var=qmask_var)
        l_docembed = L.EmbeddingLayer(l_docin,
                                      input_size=vocab_size,
                                      output_size=EMBED_DIM,
                                      W=W_init)
        l_qembed = L.EmbeddingLayer(l_qin,
                                    input_size=vocab_size,
                                    output_size=EMBED_DIM,
                                    W=l_docembed.W)

        l_fwd_doc = L.GRULayer(l_docembed,
                               NUM_HIDDEN,
                               grad_clipping=GRAD_CLIP,
                               mask_input=l_docmask,
                               gradient_steps=GRAD_STEPS,
                               precompute_input=True)
        l_bkd_doc = L.GRULayer(l_docembed,
                               NUM_HIDDEN,
                               grad_clipping=GRAD_CLIP,
                               mask_input=l_docmask,
                               gradient_steps=GRAD_STEPS,
                               precompute_input=True,
                               backwards=True)

        l_doc = L.concat([l_fwd_doc, l_bkd_doc], axis=2)

        l_fwd_q = L.GRULayer(l_qembed,
                             NUM_HIDDEN,
                             grad_clipping=GRAD_CLIP,
                             mask_input=l_qmask,
                             gradient_steps=GRAD_STEPS,
                             precompute_input=True)
        l_bkd_q = L.GRULayer(l_qembed,
                             NUM_HIDDEN,
                             grad_clipping=GRAD_CLIP,
                             mask_input=l_qmask,
                             gradient_steps=GRAD_STEPS,
                             precompute_input=True,
                             backwards=True)

        l_fwd_q_slice = L.SliceLayer(l_fwd_q, -1, 1)
        l_bkd_q_slice = L.SliceLayer(l_bkd_q, 0, 1)
        l_q = L.ConcatLayer([l_fwd_q_slice, l_bkd_q_slice])

        d = L.get_output(l_doc)  # B x N x D
        q = L.get_output(l_q)  # B x D
        p = T.batched_dot(d, q)  # B x N
        pm = T.nnet.softmax(
            T.set_subtensor(
                T.alloc(-20., p.shape[0], p.shape[1])[docmask_var.nonzero()],
                p[docmask_var.nonzero()]))

        index = T.reshape(T.repeat(T.arange(p.shape[0]), p.shape[1]), p.shape)
        final = T.inc_subtensor(
            T.alloc(0., p.shape[0], vocab_size)[index,
                                                T.flatten(doc_var, outdim=2)],
            pm)
        #qv = T.flatten(query_var,outdim=2)
        #index2 = T.reshape(T.repeat(T.arange(qv.shape[0]),qv.shape[1]),qv.shape)
        #xx = index2[qmask_var.nonzero()]
        #yy = qv[qmask_var.nonzero()]
        #pV = T.set_subtensor(final[xx,yy], T.zeros_like(qv[xx,yy]))

        return final, l_doc, l_q
    def build_network(self, K, vocab_size, W_init):

        l_docin = L.InputLayer(shape=(None, None, 1), input_var=self.inps[0])
        l_doctokin = L.InputLayer(shape=(None, None), input_var=self.inps[1])
        l_qin = L.InputLayer(shape=(None, None, 1), input_var=self.inps[2])
        l_qtokin = L.InputLayer(shape=(None, None), input_var=self.inps[3])
        l_docmask = L.InputLayer(shape=(None, None), input_var=self.inps[6])
        l_qmask = L.InputLayer(shape=(None, None), input_var=self.inps[7])
        l_tokin = L.InputLayer(shape=(None, MAX_WORD_LEN),
                               input_var=self.inps[8])
        l_tokmask = L.InputLayer(shape=(None, MAX_WORD_LEN),
                                 input_var=self.inps[9])
        l_featin = L.InputLayer(shape=(None, None), input_var=self.inps[11])

        l_match_feat = L.InputLayer(shape=(None, None, None),
                                    input_var=self.inps[13])
        l_match_feat = L.EmbeddingLayer(l_match_feat, 2, 1)
        l_match_feat = L.ReshapeLayer(l_match_feat, (-1, [1], [2]))

        l_use_char = L.InputLayer(shape=(None, None, self.feat_cnt),
                                  input_var=self.inps[14])
        l_use_char_q = L.InputLayer(shape=(None, None, self.feat_cnt),
                                    input_var=self.inps[15])

        doc_shp = self.inps[1].shape
        qry_shp = self.inps[3].shape

        l_docembed = L.EmbeddingLayer(l_docin,
                                      input_size=vocab_size,
                                      output_size=self.embed_dim,
                                      W=W_init)  # B x N x 1 x DE
        l_doce = L.ReshapeLayer(
            l_docembed, (doc_shp[0], doc_shp[1], self.embed_dim))  # B x N x DE
        l_qembed = L.EmbeddingLayer(l_qin,
                                    input_size=vocab_size,
                                    output_size=self.embed_dim,
                                    W=l_docembed.W)

        if self.train_emb == 0:
            l_docembed.params[l_docembed.W].remove('trainable')
            l_qembed.params[l_qembed.W].remove('trainable')

        l_qembed = L.ReshapeLayer(
            l_qembed, (qry_shp[0], qry_shp[1], self.embed_dim))  # B x N x DE
        l_fembed = L.EmbeddingLayer(l_featin, input_size=2,
                                    output_size=2)  # B x N x 2

        # char embeddings
        if self.use_chars:
            # ====== concatenation ======
            # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 2*self.char_dim) # T x L x D
            # l_fgru = L.GRULayer(l_lookup, self.char_dim, grad_clipping=GRAD_CLIP,
            #         mask_input=l_tokmask, gradient_steps=GRAD_STEPS, precompute_input=True,
            #         only_return_final=True)
            # l_bgru = L.GRULayer(l_lookup, 2*self.char_dim, grad_clipping=GRAD_CLIP,
            #         mask_input=l_tokmask, gradient_steps=GRAD_STEPS, precompute_input=True,
            #         backwards=True, only_return_final=True) # T x 2D
            # l_fwdembed = L.DenseLayer(l_fgru, self.embed_dim/2, nonlinearity=None) # T x DE/2
            # l_bckembed = L.DenseLayer(l_bgru, self.embed_dim/2, nonlinearity=None) # T x DE/2
            # l_embed = L.ElemwiseSumLayer([l_fwdembed, l_bckembed], coeffs=1)
            # l_docchar_embed = IndexLayer([l_doctokin, l_embed]) # B x N x DE/2
            # l_qchar_embed = IndexLayer([l_qtokin, l_embed]) # B x Q x DE/2

            # l_doce = L.ConcatLayer([l_doce, l_docchar_embed], axis=2)
            # l_qembed = L.ConcatLayer([l_qembed, l_qchar_embed], axis=2)

            # ====== bidir feat concat ======
            # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32)
            # l_fgru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True)
            # l_bgru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True, backwards = True)
            # l_char_gru = L.ElemwiseSumLayer([l_fgru, l_bgru])
            # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru])
            # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru])

            # l_doce = L.ConcatLayer([l_use_char, l_docchar_embed, l_doce], axis = 2)
            # l_qembed = L.ConcatLayer([l_use_char_q, l_qchar_embed, l_qembed], axis = 2)

            # ====== char concat ======
            # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32)
            # l_char_gru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True)
            # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru])
            # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru])

            # l_doce = L.ConcatLayer([l_docchar_embed, l_doce], axis = 2)
            # l_qembed = L.ConcatLayer([l_qchar_embed, l_qembed], axis = 2)

            # ====== feat concat ======
            # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32)
            # l_char_gru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True)
            # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru])
            # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru])

            # l_doce = L.ConcatLayer([l_use_char, l_docchar_embed, l_doce], axis = 2)
            # l_qembed = L.ConcatLayer([l_use_char_q, l_qchar_embed, l_qembed], axis = 2)

            # ====== gating ======
            # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32)
            # l_char_gru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True)
            # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru])
            # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru])

            # l_doce = GateDymLayer([l_use_char, l_docchar_embed, l_doce])
            # l_qembed = GateDymLayer([l_use_char_q, l_qchar_embed, l_qembed])

            # ====== tie gating ======
            # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32)
            # l_char_gru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True)
            # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru])
            # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru])

            # l_doce = GateDymLayer([l_use_char, l_docchar_embed, l_doce])
            # l_qembed = GateDymLayer([l_use_char_q, l_qchar_embed, l_qembed], W = l_doce.W, b = l_doce.b)

            # ====== scalar gating ======
            # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32)
            # l_char_gru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True)
            # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru])
            # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru])

            # l_doce = ScalarDymLayer([l_use_char, l_docchar_embed, l_doce])
            # l_qembed = ScalarDymLayer([l_use_char_q, l_qchar_embed, l_qembed])

            # ====== dibirectional gating ======
            # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32)
            # l_fgru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True)
            # l_bgru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True, backwards = True)
            # l_char_gru = L.ElemwiseSumLayer([l_fgru, l_bgru])
            # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru])
            # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru])

            # l_doce = GateDymLayer([l_use_char, l_docchar_embed, l_doce])
            # l_qembed = GateDymLayer([l_use_char_q, l_qchar_embed, l_qembed])

            # ====== gate + concat ======
            l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32)
            l_char_gru = L.GRULayer(l_lookup,
                                    self.embed_dim,
                                    grad_clipping=GRAD_CLIP,
                                    mask_input=l_tokmask,
                                    gradient_steps=GRAD_STEPS,
                                    precompute_input=True,
                                    only_return_final=True)
            l_docchar_embed = IndexLayer([l_doctokin, l_char_gru])
            l_qchar_embed = IndexLayer([l_qtokin, l_char_gru])

            l_doce = GateDymLayer([l_use_char, l_docchar_embed, l_doce])
            l_qembed = GateDymLayer([l_use_char_q, l_qchar_embed, l_qembed])

            l_doce = L.ConcatLayer([l_use_char, l_doce], axis=2)
            l_qembed = L.ConcatLayer([l_use_char_q, l_qembed], axis=2)

            # ====== bidirectional gate + concat ======
            # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32)
            # l_fgru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True)
            # l_bgru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True, backwards = True)
            # l_char_gru = L.ElemwiseSumLayer([l_fgru, l_bgru])
            # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru])
            # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru])

            # l_doce = GateDymLayer([l_use_char, l_docchar_embed, l_doce])
            # l_qembed = GateDymLayer([l_use_char_q, l_qchar_embed, l_qembed])

            # l_doce = L.ConcatLayer([l_use_char, l_doce], axis = 2)
            # l_qembed = L.ConcatLayer([l_use_char_q, l_qembed], axis = 2)

        attentions = []
        if self.save_attn:
            l_m = PairwiseInteractionLayer([l_doce, l_qembed])
            attentions.append(L.get_output(l_m, deterministic=True))

        for i in range(K - 1):
            l_fwd_doc_1 = L.GRULayer(l_doce,
                                     self.nhidden,
                                     grad_clipping=GRAD_CLIP,
                                     mask_input=l_docmask,
                                     gradient_steps=GRAD_STEPS,
                                     precompute_input=True)
            l_bkd_doc_1 = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP,
                    mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True, \
                            backwards=True)

            l_doc_1 = L.concat([l_fwd_doc_1, l_bkd_doc_1],
                               axis=2)  # B x N x DE

            l_fwd_q_1 = L.GRULayer(l_qembed,
                                   self.nhidden,
                                   grad_clipping=GRAD_CLIP,
                                   mask_input=l_qmask,
                                   gradient_steps=GRAD_STEPS,
                                   precompute_input=True)
            l_bkd_q_1 = L.GRULayer(l_qembed,
                                   self.nhidden,
                                   grad_clipping=GRAD_CLIP,
                                   mask_input=l_qmask,
                                   gradient_steps=GRAD_STEPS,
                                   precompute_input=True,
                                   backwards=True)

            l_q_c_1 = L.ConcatLayer([l_fwd_q_1, l_bkd_q_1],
                                    axis=2)  # B x Q x DE

            l_doce = MatrixAttentionLayer(
                [l_doc_1, l_q_c_1, l_qmask, l_match_feat])
            # l_doce = MatrixAttentionLayer([l_doc_1, l_q_c_1, l_qmask])

            # === begin GA ===
            # l_m = PairwiseInteractionLayer([l_doc_1, l_q_c_1])
            # l_doc_2_in = GatedAttentionLayer([l_doc_1, l_q_c_1, l_m], mask_input=self.inps[7])
            # l_doce = L.dropout(l_doc_2_in, p=self.dropout) # B x N x DE
            # === end GA ===

            # if self.save_attn:
            #     attentions.append(L.get_output(l_m, deterministic=True))

        if self.use_feat:
            l_doce = L.ConcatLayer([l_doce, l_fembed], axis=2)  # B x N x DE+2
        l_fwd_doc = L.GRULayer(l_doce,
                               self.nhidden,
                               grad_clipping=GRAD_CLIP,
                               mask_input=l_docmask,
                               gradient_steps=GRAD_STEPS,
                               precompute_input=True)
        l_bkd_doc = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP,
                mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True, \
                        backwards=True)
        l_doc = L.concat([l_fwd_doc, l_bkd_doc], axis=2)

        l_fwd_q = L.GRULayer(l_qembed,
                             self.nhidden,
                             grad_clipping=GRAD_CLIP,
                             mask_input=l_qmask,
                             gradient_steps=GRAD_STEPS,
                             precompute_input=True,
                             only_return_final=False)
        l_bkd_q = L.GRULayer(l_qembed,
                             self.nhidden,
                             grad_clipping=GRAD_CLIP,
                             mask_input=l_qmask,
                             gradient_steps=GRAD_STEPS,
                             precompute_input=True,
                             backwards=True,
                             only_return_final=False)
        l_q = L.ConcatLayer([l_fwd_q, l_bkd_q], axis=2)  # B x Q x 2D

        if self.save_attn:
            l_m = PairwiseInteractionLayer([l_doc, l_q])
            attentions.append(L.get_output(l_m, deterministic=True))

        l_prob = AttentionSumLayer([l_doc, l_q],
                                   self.inps[4],
                                   self.inps[12],
                                   mask_input=self.inps[10])
        final = L.get_output(l_prob)
        final_v = L.get_output(l_prob, deterministic=True)

        return final, final_v, l_prob, l_docembed.W, attentions
def construct_unet(channels=1, no_f_base=8, f_size=3, dropout=False, bs=None, class_nums=2, pad="same",nonlinearity=lasagne.nonlinearities.rectify, input_dim=[512,512]):
    net={}
    net["input"]= InputLayer(shape=(bs, channels, input_dim[0], input_dim[1]))

    # Moving downwards the U-shape. Simplified:
    net["conv_down11"] = Conv2DLayer(net["input"],no_f_base,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu'))
    net["conv_down12"] = Conv2DLayer(net["conv_down11"],no_f_base,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu'))
    net["pool1"]      = Pool2DLayer(net["conv_down12"],pool_size=2)

    net["conv_down21"] = Conv2DLayer(net["pool1"],no_f_base*2,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu'))
    net["conv_down22"] = Conv2DLayer(net["conv_down21"],no_f_base*2,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu'))
    net["pool2"]      = Pool2DLayer(net["conv_down22"],pool_size=2)

    net["conv_down31"] = Conv2DLayer(net["pool2"],no_f_base*4,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu'))
    net["conv_down32"] = Conv2DLayer(net["conv_down31"],no_f_base*4,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu'))
    net["pool3"]      = Pool2DLayer(net["conv_down32"],pool_size=2)

    net["conv_down41"] = Conv2DLayer(net["pool3"],no_f_base*8,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu'))
    net["conv_down42"] = Conv2DLayer(net["conv_down41"],no_f_base*8,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu'))
    to_drop1 = net["pool4"]      = Pool2DLayer(net["conv_down42"],pool_size=2)

    if dropout:
        to_drop1 = DropoutLayer(to_drop1, p=0.5)

    #vvvv bottom vvvv
    net["conv_bottom1"] = Conv2DLayer(to_drop1,no_f_base*16,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu'))
    net["conv_bottom2"] = Conv2DLayer(net["conv_bottom1"],no_f_base*16,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu'))
    net["deconv_bottom1"]      = Deconv2DLayer(net["conv_bottom2"], no_f_base*8, 2, 2)
    #^^^^ bottom ^^^^

    # Moving upwards the U-shape. Simplified:
    net["concat1"] = concat([net["deconv_bottom1"], net["conv_down42"]], cropping=(None, None, "center", "center"))
    net["conv_up11"]= Conv2DLayer(net["concat1"], no_f_base*8, f_size, pad=pad, nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu'))
    net["conv_up11"]= Conv2DLayer(net["conv_up11"], no_f_base*8, f_size, pad=pad, nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu'))
    net["deconv_up1"] = Deconv2DLayer(net["conv_up11"], no_f_base*4, 2, 2)

    net["concat2"] = concat([net["deconv_up1"], net["conv_down32"]], cropping=(None, None, "center", "center"))
    net["conv_up21"]= Conv2DLayer(net["concat2"], no_f_base*4, f_size, pad=pad, nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu'))
    net["conv_up22"]= Conv2DLayer(net["conv_up21"], no_f_base*4, f_size, pad=pad, nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu'))
    net["deconv_up2"] = Deconv2DLayer(net["conv_up22"], no_f_base*2, 2, 2)

    net["concat3"] = concat([net["deconv_up2"], net["conv_down22"]], cropping=(None, None, "center", "center"))
    net["conv_up31"]= Conv2DLayer(net["concat3"], no_f_base*2, f_size, pad=pad, nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu'))
    net["conv_up32"]= Conv2DLayer(net["conv_up31"], no_f_base*2, f_size, pad=pad, nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu'))
    net["deconv_up3"] = Deconv2DLayer(net["conv_up32"], no_f_base, 2, 2)

    net["concat4"] = concat([net["deconv_up3"], net["conv_down12"]], cropping=(None, None, "center", "center"))
    net["conv_up41"]= Conv2DLayer(net["concat4"], no_f_base, f_size, pad=pad, nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu'))
    net["conv_up42"]= Conv2DLayer(net["conv_up41"], no_f_base, f_size, pad=pad, nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu'))
    # Class layer: Work around standard softmax bc. it doesn't work with tensor4/3.
    # Hence, we reshape and feed it to an external Nonlinearity layer.
    # net["class_ns"] is the output in image-related shape.

    net["out"] = Conv2DLayer(net["conv_up42"], class_nums, 1, nonlinearity=None,W=lasagne.init.HeNormal(gain='relu'))
    net["layer_shuffle_dim"] = DimshuffleLayer(net["out"], (1, 0, 2, 3))
    net["reshape_layer"] = ReshapeLayer(net["layer_shuffle_dim"], (class_nums, -1))
    net["layer_shuffle_dim2"] = DimshuffleLayer(net["reshape_layer"], (1, 0))
    # Flattened output to be able to feed it to lasagne.objectives.categorical_crossentropy.
    net["out_optim"] = NonlinearityLayer(net["layer_shuffle_dim2"], nonlinearity=lasagne.nonlinearities.softmax)

    return net
    net = None
def define_net(input_var):
    net = {}
    net['data'] = InputLayer(shape=(None, 3, IMAGE_SHAPE[0], IMAGE_SHAPE[1]),
                             input_var=input_var)

    net['patch'] = sample_layer.Sample2DLayer(net['data'],
                                              5, (227, 227),
                                              pad=False)

    # conv1
    net['conv1'] = Conv2DLayer(net['patch'],
                               num_filters=96,
                               filter_size=(11, 11),
                               stride=4,
                               nonlinearity=lasagne.nonlinearities.rectify)

    # pool1
    net['pool1'] = MaxPool2DLayer(net['conv1'], pool_size=(3, 3), stride=2)

    # norm1
    net['norm1'] = LocalResponseNormalization2DLayer(net['pool1'],
                                                     n=5,
                                                     alpha=0.0001 / 5.0,
                                                     beta=0.75,
                                                     k=1)

    # before conv2 split the data
    net['conv2_data1'] = SliceLayer(net['norm1'], indices=slice(0, 48), axis=1)
    net['conv2_data2'] = SliceLayer(net['norm1'],
                                    indices=slice(48, 96),
                                    axis=1)

    # now do the convolutions
    net['conv2_part1'] = Conv2DLayer(net['conv2_data1'],
                                     num_filters=128,
                                     filter_size=(5, 5),
                                     pad=2)
    net['conv2_part2'] = Conv2DLayer(net['conv2_data2'],
                                     num_filters=128,
                                     filter_size=(5, 5),
                                     pad=2)

    # now combine
    net['conv2'] = concat((net['conv2_part1'], net['conv2_part2']), axis=1)

    # pool2
    net['pool2'] = MaxPool2DLayer(net['conv2'], pool_size=(3, 3), stride=2)

    # norm2
    net['norm2'] = LocalResponseNormalization2DLayer(net['pool2'],
                                                     n=5,
                                                     alpha=0.0001 / 5.0,
                                                     beta=0.75,
                                                     k=1)

    # conv3
    # no group
    net['conv3'] = Conv2DLayer(net['norm2'],
                               num_filters=384,
                               filter_size=(3, 3),
                               pad=1)

    # conv4
    # group = 2
    net['conv4_data1'] = SliceLayer(net['conv3'],
                                    indices=slice(0, 192),
                                    axis=1)
    net['conv4_data2'] = SliceLayer(net['conv3'],
                                    indices=slice(192, 384),
                                    axis=1)
    net['conv4_part1'] = Conv2DLayer(net['conv4_data1'],
                                     num_filters=192,
                                     filter_size=(3, 3),
                                     pad=1)
    net['conv4_part2'] = Conv2DLayer(net['conv4_data2'],
                                     num_filters=192,
                                     filter_size=(3, 3),
                                     pad=1)
    net['conv4'] = concat((net['conv4_part1'], net['conv4_part2']), axis=1)

    # conv5
    # group 2
    net['conv5_data1'] = SliceLayer(net['conv4'],
                                    indices=slice(0, 192),
                                    axis=1)
    net['conv5_data2'] = SliceLayer(net['conv4'],
                                    indices=slice(192, 384),
                                    axis=1)
    net['conv5_part1'] = Conv2DLayer(net['conv5_data1'],
                                     num_filters=128,
                                     filter_size=(3, 3),
                                     pad=1)
    net['conv5_part2'] = Conv2DLayer(net['conv5_data2'],
                                     num_filters=128,
                                     filter_size=(3, 3),
                                     pad=1)
    net['conv5'] = concat((net['conv5_part1'], net['conv5_part2']), axis=1)

    # pool 5
    net['pool5'] = MaxPool2DLayer(net['conv5'], pool_size=(3, 3), stride=2)

    # fc6
    net['fc6'] = DenseLayer(net['pool5'],
                            num_units=4096,
                            nonlinearity=lasagne.nonlinearities.rectify)

    # fc7
    net['fc7'] = DenseLayer(net['fc6'],
                            num_units=4096,
                            nonlinearity=lasagne.nonlinearities.rectify)

    # fc8
    net['out'] = DenseLayer(net['fc7'],
                            num_units=1,
                            nonlinearity=lasagne.nonlinearities.linear)

    # print ('Objective layer shapes:')
    # print (lasagne.layers.get_output_shape(net['pool5']))
    # # fc6
    # net['fc6'] = Conv2DLayer(
    #     net['pool5'], num_filters=4096, filter_size=(6, 6),
    #     nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False)
    # print (lasagne.layers.get_output_shape(net['fc6']))
    # # fc7
    # net['fc7'] = Conv2DLayer(
    #     net['fc6'],
    #     num_filters=4096, filter_size=(1, 1),
    #     nonlinearity=lasagne.nonlinearities.rectify)
    # print (lasagne.layers.get_output_shape(net['fc7']))
    # # fc8
    # net['out'] = Conv2DLayer(
    #     net['fc7'],
    #     num_filters=1, filter_size=(1, 1),
    #     nonlinearity=lasagne.nonlinearities.linear)
    # print (lasagne.layers.get_output_shape(net['out']))
    return net
Beispiel #22
0
 def get_params_internal(
         self, **tags):  # this gives ALL the vars (not the params values)
     return L.get_all_params(  # this lasagne function also returns all var below the passed layers
         L.concat(self._output_layers), **tags)
def build_generator(input_var=None, batch_size=None, n_timesteps=128, alphabet_size=128):
    from lasagne.layers import InputLayer, DenseLayer, LSTMLayer
    from lasagne.layers import TransposedConv2DLayer as Deconv2DLayer
    from lasagne.layers import ExpressionLayer, NonlinearityLayer
    from lasagne.layers import ReshapeLayer, DimshuffleLayer, Upscale2DLayer, concat
    try:
        from lasagne.layers.dnn import batch_norm_dnn as batch_norm
    except ImportError:
        from lasagne.layers import batch_norm

    from lasagne.nonlinearities import sigmoid, tanh, softmax
    """
    layer = InputLayer(shape=(batch_size, 100), input_var=input_var)
    print("MNIST generator")
    layer = batch_norm(DenseLayer(layer, 1024))
    layer = batch_norm(DenseLayer(layer, 1024*8*8))
    layer = ReshapeLayer(layer, ([0], 1024, 8, 8))
    layer = batch_norm(Deconv2DLayer(
        layer, 128, 5, stride=2, crop='same', output_size=16))
    layer = batch_norm(Deconv2DLayer(
        layer, 128, 5, stride=2, crop='same', output_size=32))
    layer = batch_norm(Deconv2DLayer(
        layer, 128, 5, stride=2, crop='same', output_size=64))
    layer = batch_norm(Deconv2DLayer(
        layer, 1, 5, stride=2, crop='same', output_size=128,
        nonlinearity=tanh))

    # Crepe
    print("Crepe generator")
    layer = batch_norm(DenseLayer(layer, 1024))
    layer = batch_norm(DenseLayer(layer, 1024*13))
    layer = ReshapeLayer(layer, ([0], 1024, 1, 13))
    layer = batch_norm(Deconv2DLayer(
        layer, 512, (1, 4), stride=2, crop=0))
    layer = batch_norm(Deconv2DLayer(
        layer, 1024, (1, 5), stride=2, crop=0))
    layer = batch_norm(Deconv2DLayer(
        layer, 2048, (1, 5), stride=2, crop=0))
    layer = Deconv2DLayer(
        layer, 1, (128, 8), stride=1, crop=0, nonlinearity=tanh)
    """
    # LSTM
    # input layers
    layer = InputLayer(shape=(batch_size, n_timesteps, 100), input_var=input_var)
    # recurrent layers for bidirectional network
    l_forward_noise = LSTMLayer(
        layer, 64, learn_init=True, grad_clipping=None, only_return_final=False)
    l_backward_noise = LSTMLayer(
        layer, 64, learn_init=True, grad_clipping=None, only_return_final=False,
        backwards=True)
    layer = concat(
        [l_forward_noise, l_backward_noise], axis=2)
    pdb.set_trace()
    layer = DenseLayer(layer, 1024, num_leading_axes=2)
    layer = DenseLayer(layer, alphabet_size, num_leading_axes=2)
    layer = ReshapeLayer(layer, (batch_size*n_timesteps, -1))
    layer = NonlinearityLayer(layer, softmax)
    layer = ReshapeLayer(layer, (batch_size, n_timesteps, -1))
    layer = DimshuffleLayer(layer, (0, 'x', 2, 1))
    layer = ExpressionLayer(layer, lambda X: X*2 - 1)

    print("Generator output:", layer.output_shape)
    return layer
Beispiel #24
0
    # encoder
    l_encoder1 = layers.DenseLayer(l_in, num_units=num_hidden_units)
    l_encoder2 = layers.DenseLayer(l_encoder1, num_units=num_hidden_units)
    l_encoder3 = layers.DenseLayer(l_encoder2, num_units=num_hidden_units)
    l_encoder4 = layers.DenseLayer(l_encoder3, num_units=num_hidden_units)

    # learned representation
    l_observed = layers.DenseLayer(l_encoder4, num_units=output_dim,
                                      nonlinearity=T.nnet.softmax)

    l_latent = layers.DenseLayer(l_encoder4,
                                 num_units=latent_size,
                                 nonlinearity=None) # linear

    l_representation = layers.concat([l_observed, l_latent])

    # decoder
    l_decoder1 = layers.DenseLayer(l_representation, num_units=num_hidden_units)
    l_decoder2 = layers.DenseLayer(l_decoder1, num_units=num_hidden_units)
    l_decoder3 = layers.DenseLayer(l_decoder2, num_units=num_hidden_units)
    l_decoder4 = layers.DenseLayer(l_decoder3, num_units=num_hidden_units)
    l_decoder_out = layers.DenseLayer(l_decoder4, num_units=input_dim,
                                       nonlinearity=nonlinearities.sigmoid)

    x_to_z = LightweightModel([l_in], [l_latent])
    x_to_y = LightweightModel([l_in], [l_observed])
    z_to_x = LightweightModel([l_observed, l_latent], [l_decoder_out])
    model = Model()
    model.x_to_z = x_to_z
    model.x_to_y = x_to_y
Beispiel #25
0
def build_model(vocab_size,
                doc_var,
                qry_var,
                doc_mask_var,
                qry_mask_var,
                W_init=lasagne.init.Normal()):

    l_doc_in = L.InputLayer(shape=(None, None, 1), input_var=doc_var)
    l_qry_in = L.InputLayer(shape=(None, None, 1), input_var=qry_var)

    l_doc_embed = L.EmbeddingLayer(l_doc_in, vocab_size, EMBED_DIM, W=W_init)
    l_qry_embed = L.EmbeddingLayer(l_qry_in,
                                   vocab_size,
                                   EMBED_DIM,
                                   W=l_doc_embed.W)

    l_doc_mask = L.InputLayer(shape=(None, None), input_var=doc_mask_var)
    l_qry_mask = L.InputLayer(shape=(None, None), input_var=qry_mask_var)

    l_doc_fwd = L.LSTMLayer(l_doc_embed,
                            NUM_HIDDEN,
                            grad_clipping=GRAD_CLIP,
                            mask_input=l_doc_mask,
                            gradient_steps=GRAD_STEPS,
                            precompute_input=True)
    l_doc_bkd = L.LSTMLayer(l_doc_embed,
                            NUM_HIDDEN,
                            grad_clipping=GRAD_CLIP,
                            mask_input=l_doc_mask,
                            gradient_steps=GRAD_STEPS,
                            precompute_input=True,
                            backwards=True)
    l_qry_fwd = L.LSTMLayer(l_qry_embed,
                            NUM_HIDDEN,
                            grad_clipping=GRAD_CLIP,
                            mask_input=l_qry_mask,
                            gradient_steps=GRAD_STEPS,
                            precompute_input=True)
    l_qry_bkd = L.LSTMLayer(l_qry_embed,
                            NUM_HIDDEN,
                            grad_clipping=GRAD_CLIP,
                            mask_input=l_qry_mask,
                            gradient_steps=GRAD_STEPS,
                            precompute_input=True,
                            backwards=True)

    l_doc_fwd_slice = L.SliceLayer(l_doc_fwd, -1, 1)
    l_doc_bkd_slice = L.SliceLayer(l_doc_bkd, 0, 1)
    l_qry_fwd_slice = L.SliceLayer(l_qry_fwd, -1, 1)
    l_qry_bkd_slice = L.SliceLayer(l_qry_bkd, 0, 1)

    r = L.DenseLayer(L.ElemwiseSumLayer([l_doc_fwd_slice, l_doc_bkd_slice]),
                     num_units=NUM_HIDDEN,
                     nonlinearity=lasagne.nonlinearities.tanh)
    u = L.DenseLayer(L.ElemwiseSumLayer([l_qry_fwd_slice, l_qry_bkd_slice]),
                     num_units=NUM_HIDDEN,
                     nonlinearity=lasagne.nonlinearities.tanh)

    g = L.DenseLayer(L.concat([r, u], axis=1),
                     num_units=EMBED_DIM,
                     W=lasagne.init.GlorotNormal(),
                     nonlinearity=lasagne.nonlinearities.tanh)

    l_out = L.DenseLayer(g,
                         num_units=vocab_size,
                         W=l_doc_embed.W.T,
                         nonlinearity=lasagne.nonlinearities.softmax,
                         b=None)

    return l_out
Beispiel #26
0
    def get_actor(self, avg=False):
        suf = '_avg' if avg else ''
        iw = L.InputLayer(shape=(None, self.args.sw))  # (100, 24)
        ew = L.EmbeddingLayer(
            iw,
            self.args.vw,
            self.args.nw,
            name='ew' + suf,
            W=HeNormal() if not avg else Constant())  # (100, 24, 256)
        ew.params[ew.W].remove('regularizable')
        if 'w' in self.args.freeze:
            ew.params[ew.W].remove('trainable')
        # for access from outside
        if not avg:
            self.Ew = ew.W

        # char embedding with CNN/LSTM
        ic = L.InputLayer(shape=(None, self.args.sw,
                                 self.args.max_len))  # (100, 24, 32)
        ec = self.get_char2word(ic, avg)  # (100, 24, 256)

        it = L.InputLayer(shape=(None, self.args.st))
        et = L.EmbeddingLayer(it,
                              self.args.vt,
                              self.args.nt,
                              name='et' + suf,
                              W=HeNormal() if not avg else Constant())
        et.params[et.W].remove('regularizable')

        il = L.InputLayer(shape=(None, self.args.sl))
        el = L.EmbeddingLayer(il,
                              self.args.vl,
                              self.args.nl,
                              name='el' + suf,
                              W=HeNormal() if not avg else Constant())
        el.params[el.W].remove('regularizable')

        to_concat = []
        if self.args.type == 'word':
            to_concat.append(ew)
        elif self.args.type == 'char':
            to_concat.append(ec)
        elif self.args.type == 'both':
            to_concat += [ew, ec]
        elif self.args.type == 'mix':
            to_concat.append(L.ElemwiseSumLayer([ew, ec]))

        if not self.args.untagged:
            to_concat.append(et)
        if not self.args.unlabeled:
            to_concat.append(el)

        x = L.concat(to_concat, axis=2)  # (100, 24, 64+16+16)

        # additional:
        # get the more compact representation of each token by its word, tag and label,
        # before putting into the hidden layer
        if self.args.squeeze:
            x = L.DenseLayer(
                x,
                num_units=self.args.squeeze,
                name='h0' + suf,
                num_leading_axes=2,
                W=HeNormal('relu') if not avg else Constant())  # (100, 24, 64)

        h1 = L.DenseLayer(
            x,
            num_units=self.args.nh1,
            name='h1' + suf,
            W=HeNormal('relu') if not avg else Constant())  # (100, 512)
        h1 = L.dropout(h1, self.args.p1)
        h2 = L.DenseLayer(
            h1,
            num_units=self.args.nh2,
            name='h2' + suf,
            W=HeNormal('relu') if not avg else Constant())  # (100, 256)
        h2 = L.dropout(h2, self.args.p2)
        h3 = L.DenseLayer(h2,
                          num_units=self.args.nh3,
                          name='h3' + suf,
                          W=HeNormal() if not avg else Constant(),
                          nonlinearity=softmax)  # (100, 125) num of actions

        return iw, ic, it, il, h3
Beispiel #27
0
def test_memory(
    game_title='SpaceInvaders-v0',
    n_parallel_games=3,
    replay_seq_len=2,
):
    """
    :param game_title: name of atari game in Gym
    :param n_parallel_games: how many games we run in parallel
    :param replay_seq_len: how long is one replay session from a batch
    """

    atari = gym.make(game_title)
    atari.reset()

    # Game Parameters
    n_actions = atari.action_space.n
    observation_shape = (None, ) + atari.observation_space.shape
    action_names = atari.get_action_meanings()
    del atari
    # ##### Agent observations

    # image observation at current tick goes here
    observation_layer = InputLayer(observation_shape, name="images input")

    # reshape to [batch, color, x, y] to allow for convolutional layers to work correctly
    observation_reshape = DimshuffleLayer(observation_layer, (0, 3, 1, 2))

    # Agent memory states

    memory_dict = OrderedDict([])

    ###Window
    window_size = 3

    # prev state input
    prev_window = InputLayer(
        (None, window_size) + tuple(observation_reshape.output_shape[1:]),
        name="previous window state")

    # our window
    window = WindowAugmentation(observation_reshape,
                                prev_window,
                                name="new window state")

    # pixel-wise maximum over the temporal window (to avoid flickering)
    window_max = ExpressionLayer(window,
                                 lambda a: a.max(axis=1),
                                 output_shape=(None, ) +
                                 window.output_shape[2:])

    memory_dict[window] = prev_window

    ###Stack
    #prev stack
    stack_w, stack_h = 4, 5
    stack_inputs = DenseLayer(observation_reshape, stack_w, name="prev_stack")
    stack_controls = DenseLayer(observation_reshape,
                                3,
                                nonlinearity=lasagne.nonlinearities.softmax,
                                name="prev_stack")
    prev_stack = InputLayer((None, stack_h, stack_w),
                            name="previous stack state")
    stack = StackAugmentation(stack_inputs, prev_stack, stack_controls)
    memory_dict[stack] = prev_stack

    stack_top = lasagne.layers.SliceLayer(stack, 0, 1)

    ###RNN preset

    prev_rnn = InputLayer((None, 16), name="previous RNN state")
    new_rnn = RNNCell(prev_rnn, observation_reshape)
    memory_dict[new_rnn] = prev_rnn

    ###GRU preset
    prev_gru = InputLayer((None, 16), name="previous GRUcell state")
    new_gru = GRUCell(prev_gru, observation_reshape)
    memory_dict[new_gru] = prev_gru

    ###GRUmemorylayer
    prev_gru1 = InputLayer((None, 15), name="previous GRUcell state")
    new_gru1 = GRUMemoryLayer(15, observation_reshape, prev_gru1)
    memory_dict[new_gru1] = prev_gru1

    #LSTM with peepholes
    prev_lstm0_cell = InputLayer(
        (None, 13), name="previous LSTMCell hidden state [with peepholes]")

    prev_lstm0_out = InputLayer(
        (None, 13), name="previous LSTMCell output state [with peepholes]")

    new_lstm0_cell, new_lstm0_out = LSTMCell(
        prev_lstm0_cell,
        prev_lstm0_out,
        input_or_inputs=observation_reshape,
        peepholes=True,
        name="newLSTM1 [with peepholes]")

    memory_dict[new_lstm0_cell] = prev_lstm0_cell
    memory_dict[new_lstm0_out] = prev_lstm0_out

    #LSTM without peepholes
    prev_lstm1_cell = InputLayer(
        (None, 14), name="previous LSTMCell hidden state [no peepholes]")

    prev_lstm1_out = InputLayer(
        (None, 14), name="previous LSTMCell output state [no peepholes]")

    new_lstm1_cell, new_lstm1_out = LSTMCell(
        prev_lstm1_cell,
        prev_lstm1_out,
        input_or_inputs=observation_reshape,
        peepholes=False,
        name="newLSTM1 [no peepholes]")

    memory_dict[new_lstm1_cell] = prev_lstm1_cell
    memory_dict[new_lstm1_out] = prev_lstm1_out

    ##concat everything

    for i in [flatten(window_max), stack_top, new_rnn, new_gru, new_gru1]:
        print(i.output_shape)
    all_memory = concat([
        flatten(window_max),
        stack_top,
        new_rnn,
        new_gru,
        new_gru1,
        new_lstm0_out,
        new_lstm1_out,
    ])

    # ##### Neural network body
    # you may use any other lasagne layers, including convolutions, batch_norms, maxout, etc

    # a simple lasagne network (try replacing with any other lasagne network and see what works best)
    nn = DenseLayer(all_memory, num_units=50, name='dense0')

    # Agent policy and action picking
    q_eval = DenseLayer(nn,
                        num_units=n_actions,
                        nonlinearity=lasagne.nonlinearities.linear,
                        name="QEvaluator")

    # resolver
    resolver = EpsilonGreedyResolver(q_eval, epsilon=0.1, name="resolver")

    # agent
    agent = Agent(observation_layer, memory_dict, q_eval, resolver)

    # Since it's a single lasagne network, one can get it's weights, output, etc
    weights = lasagne.layers.get_all_params(resolver, trainable=True)

    # Agent step function
    print('compiling react')
    applier_fun = agent.get_react_function()

    # a nice pythonic interface
    def step(observation, prev_memories='zeros', batch_size=n_parallel_games):
        """ returns actions and new states given observation and prev state
        Prev state in default setup should be [prev window,]"""
        # default to zeros
        if prev_memories == 'zeros':
            prev_memories = [
                np.zeros((batch_size, ) + tuple(mem.output_shape[1:]),
                         dtype='float32') for mem in agent.agent_states
            ]
        res = applier_fun(np.array(observation), *prev_memories)
        action = res[0]
        memories = res[1:]
        return action, memories

    # # Create and manage a pool of atari sessions to play with

    pool = GamePool(game_title, n_parallel_games)

    observation_log, action_log, reward_log, _, _, _ = pool.interact(step, 50)

    print(np.array(action_names)[np.array(action_log)[:3, :5]])

    # # experience replay pool
    # Create an environment with all default parameters
    env = SessionPoolEnvironment(observations=observation_layer,
                                 actions=resolver,
                                 agent_memories=agent.agent_states)

    def update_pool(env, pool, n_steps=100):
        """ a function that creates new sessions and ads them into the pool
        throwing the old ones away entirely for simplicity"""

        preceding_memory_states = list(pool.prev_memory_states)

        # get interaction sessions
        observation_tensor, action_tensor, reward_tensor, _, is_alive_tensor, _ = pool.interact(
            step, n_steps=n_steps)

        # load them into experience replay environment
        env.load_sessions(observation_tensor, action_tensor, reward_tensor,
                          is_alive_tensor, preceding_memory_states)

    # load first  sessions
    update_pool(env, pool, replay_seq_len)

    # A more sophisticated way of training is to store a large pool of sessions and train on random batches of them.
    # ### Training via experience replay

    # get agent's Q-values obtained via experience replay
    _env_states, _observations, _memories, _imagined_actions, q_values_sequence = agent.get_sessions(
        env,
        session_length=replay_seq_len,
        batch_size=env.batch_size,
        optimize_experience_replay=True,
    )

    # Evaluating loss function

    scaled_reward_seq = env.rewards
    # For SpaceInvaders, however, not scaling rewards is at least working

    elwise_mse_loss = qlearning.get_elementwise_objective(
        q_values_sequence,
        env.actions[0],
        scaled_reward_seq,
        env.is_alive,
        gamma_or_gammas=0.99,
    )

    # compute mean over "alive" fragments
    mse_loss = elwise_mse_loss.sum() / env.is_alive.sum()

    # regularize network weights
    reg_l2 = regularize_network_params(resolver, l2) * 10**-4

    loss = mse_loss + reg_l2

    # Compute weight updates
    updates = lasagne.updates.adadelta(loss, weights, learning_rate=0.01)

    # mean session reward
    mean_session_reward = env.rewards.sum(axis=1).mean()

    # # Compile train and evaluation functions

    print('compiling')
    train_fun = theano.function([], [loss, mean_session_reward],
                                updates=updates)
    evaluation_fun = theano.function(
        [], [loss, mse_loss, reg_l2, mean_session_reward])
    print("I've compiled!")

    # # Training loop

    for epoch_counter in range(10):
        update_pool(env, pool, replay_seq_len)
        loss, avg_reward = train_fun()
        full_loss, q_loss, l2_penalty, avg_reward_current = evaluation_fun()

        print("epoch %i,loss %.5f, rewards: %.5f " %
              (epoch_counter, full_loss, avg_reward_current))
        print("rec %.3f reg %.3f" % (q_loss, l2_penalty))
 def get_params_internal(self, **tags):  # this gives ALL the vars (not the params values)
     return L.get_all_params(  # this lasagne function also returns all var below the passed layers
         L.concat(self._output_layers),
         **tags
     )
    def dense_fused_convnets(self,
                             fusion_level,
                             fusion_type,
                             input_var1=None,
                             input_var2=None,
                             bottleneck_W=None,
                             weights_dir=None):

        net = OrderedDict()
        net['input_rgb'] = InputLayer((None, 4, 128, 128),
                                      input_var=input_var1)
        layer = 0
        for i in range(self._net_specs_dict['num_conv_layers']):
            # Add convolution layers
            net['conv_rgb{0:d}'.format(i + 1)] = Conv2DLayer(
                net.values()[layer],
                num_filters=self._net_specs_dict['num_conv_filters'][i],
                filter_size=(self._net_specs_dict['conv_filter_size'][i], ) *
                2,
                pad='same')
            layer += 1
            if self._net_specs_dict['num_conv_layers'] <= 2:
                # Add pooling layers
                net['pool_rgb{0:d}'.format(i + 1)] = MaxPool2DLayer(
                    net.values()[layer], pool_size=(3, 3))
                layer += 1
            else:
                if i < 4:
                    if (i + 1) % 2 == 0:
                        # Add pooling layers
                        net['pool_rgb{0:d}'.format(i + 1)] = MaxPool2DLayer(
                            net.values()[layer], pool_size=(3, 3))
                        layer += 1
                else:
                    if (i + 1) == 7:
                        # Add pooling layers
                        net['pool_rgb{0:d}'.format(i + 1)] = MaxPool2DLayer(
                            net.values()[layer], pool_size=(3, 3))
                        layer += 1
        # Fc-layers
        net['fc1_rgb'] = DenseLayer(net.values()[layer],
                                    self._net_specs_dict['num_fc_units'][0])
        layer += 1
        if fusion_level == 2:
            # Add dropout layer
            net['dropout1_rgb'] = dropout(net['fc1_rgb'],
                                          p=self._model_hp_dict['p'])
            layer += 1
            net['fc2_rgb'] = DenseLayer(
                net['dropout1_rgb'], self._net_specs_dict['num_fc_units'][1])
            layer += 1

        net['input_depth'] = InputLayer((None, 1, 128, 128),
                                        input_var=input_var2)
        layer += 1
        for i in range(self._net_specs_dict['num_conv_layers']):
            # Add convolution layers
            net['conv_depth{0:d}'.format(i + 1)] = Conv2DLayer(
                net.values()[layer],
                num_filters=self._net_specs_dict['num_conv_filters'][i],
                filter_size=(self._net_specs_dict['conv_filter_size'][i], ) *
                2,
                pad='same')
            layer += 1
            if self._net_specs_dict['num_conv_layers'] <= 2:
                # Add pooling layers
                net['pool_depth{0:d}'.format(i + 1)] = MaxPool2DLayer(
                    net.values()[layer], pool_size=(3, 3))
                layer += 1
            else:
                if i < 4:
                    if (i + 1) % 2 == 0:
                        # Add pooling layers
                        net['pool_depth{0:d}'.format(i+1)] =\
                            MaxPool2DLayer(net.values()[layer],
                                           pool_size=(3, 3))
                        layer += 1
                else:
                    if (i + 1) == 7:
                        # Add pooling layers
                        net['pool_depth{0:d}'.format(i+1)] =\
                            MaxPool2DLayer(net.values()[layer],
                                           pool_size=(3, 3))
                        layer += 1
        # Fc-layers
        net['fc1_depth'] = DenseLayer(net.values()[layer],
                                      self._net_specs_dict['num_fc_units'][0])
        layer += 1
        if fusion_level == 2:
            # Add dropout layer
            net['dropout1_depth'] = dropout(net['fc1_depth'],
                                            p=self._model_hp_dict['p'])
            layer += 1
            net['fc2_depth'] = DenseLayer(
                net['dropout1_depth'], self._net_specs_dict['num_fc_units'][1])
            layer += 1

        # Fuse ConvNets by fusion_level and fusion_type
        if fusion_type == self.MAX:
            net['merge'] =\
                ElemwiseMergeLayer([net['fc%i_rgb' % fusion_level],
                                    net['fc%i_depth' % fusion_level]],
                                   T.maximum)
            layer += 1
        elif fusion_type == self.SUM:
            net['merge'] =\
                ElemwiseMergeLayer([net['fc%i_rgb' % fusion_level],
                                    net['fc%i_depth' % fusion_level]],
                                   T.add)
            layer += 1
        elif fusion_type == self.CONCAT:
            net['merge'] = concat([
                net['fc%i_rgb' % fusion_level],
                net['fc%i_depth' % fusion_level]
            ])
            layer += 1
        elif fusion_type == self.CONCATCONV:
            net['fc%i_rgb_res' % fusion_level] =\
                reshape(net['fc%i_rgb' % fusion_level], ([0], 1, [1]))
            layer += 1
            net['fc%i_depth_res' % fusion_level] =\
                reshape(net['fc%i_depth' % fusion_level], ([0], 1, [1]))
            layer += 1
            net['concat'] = concat([
                net['fc%i_rgb_res' % fusion_level],
                net['fc%i_depth_res' % fusion_level]
            ])
            layer += 1
            net['merge_con'] = Conv1DLayer(net['concat'],
                                           num_filters=1,
                                           filter_size=(1, ),
                                           nonlinearity=None)
            layer += 1
            net['merge'] = reshape(net['merge_con'], ([0], [2]))
            layer += 1

        if fusion_level == 1:
            # Add dropout layer
            net['dropout1'] = dropout(net['merge'], p=self._model_hp_dict['p'])
            layer += 1
            net['fc2'] = DenseLayer(net['dropout1'],
                                    self._net_specs_dict['num_fc_units'][1])
            layer += 1
            # Add dropout layer
            net['dropout2'] = dropout(net['fc2'], p=self._model_hp_dict['p'])
            layer += 1
        else:
            # Add dropout layer
            net['dropout2'] = dropout(net['merge'], p=self._model_hp_dict['p'])
            layer += 1
        # Add output layer(linear activation because it's regression)
        if bottleneck_W is not None:
            # Add bottleneck layer
            net['bottleneck'] = DenseLayer(net['dropout2'], 30)
            # Add output layer(linear activation because it's regression)
            net['output'] = DenseLayer(
                net['bottleneck'],
                3 * self._num_joints,
                W=bottleneck_W[0:30],
                nonlinearity=lasagne.nonlinearities.tanh)
        else:
            # Add output layer(linear activation because it's regression)
            net['output'] = DenseLayer(
                net['dropout2'],
                3 * self._num_joints,
                nonlinearity=lasagne.nonlinearities.tanh)
        if weights_dir is not None:
            lw = LoadWeights(weights_dir, net)
            lw.load_weights_numpy()
        return net
    def build_network(self,
                      vocab_size,
                      input_var,
                      mask_var,
                      docidx_var,
                      docidx_mask,
                      skip_connect=True):

        l_in = L.InputLayer(shape=(None, None, 1), input_var=input_var)

        l_mask = L.InputLayer(shape=(None, None), input_var=mask_var)

        l_embed = L.EmbeddingLayer(l_in,
                                   input_size=vocab_size,
                                   output_size=EMBED_DIM,
                                   W=self.params['W_emb'])

        l_embed_noise = L.dropout(l_embed, p=DROPOUT_RATE)

        # NOTE: Moved initialization of forget gate biases to init_params
        #forget_gate_1 = L.Gate(b=lasagne.init.Constant(3))
        #forget_gate_2 = L.Gate(b=lasagne.init.Constant(3))

        # NOTE: LSTM layer provided by Lasagne is slightly different from that used in DeepMind's paper.
        # In the paper the cell-to-* weights are not diagonal.
        # the 1st lstm layer
        in_gate = L.Gate(W_in=self.params['W_lstm1_xi'],
                         W_hid=self.params['W_lstm1_hi'],
                         W_cell=self.params['W_lstm1_ci'],
                         b=self.params['b_lstm1_i'],
                         nonlinearity=lasagne.nonlinearities.sigmoid)
        forget_gate = L.Gate(W_in=self.params['W_lstm1_xf'],
                             W_hid=self.params['W_lstm1_hf'],
                             W_cell=self.params['W_lstm1_cf'],
                             b=self.params['b_lstm1_f'],
                             nonlinearity=lasagne.nonlinearities.sigmoid)
        out_gate = L.Gate(W_in=self.params['W_lstm1_xo'],
                          W_hid=self.params['W_lstm1_ho'],
                          W_cell=self.params['W_lstm1_co'],
                          b=self.params['b_lstm1_o'],
                          nonlinearity=lasagne.nonlinearities.sigmoid)
        cell_gate = L.Gate(W_in=self.params['W_lstm1_xc'],
                           W_hid=self.params['W_lstm1_hc'],
                           W_cell=None,
                           b=self.params['b_lstm1_c'],
                           nonlinearity=lasagne.nonlinearities.tanh)
        l_fwd_1 = L.LSTMLayer(l_embed_noise,
                              NUM_HIDDEN,
                              ingate=in_gate,
                              forgetgate=forget_gate,
                              cell=cell_gate,
                              outgate=out_gate,
                              peepholes=True,
                              grad_clipping=GRAD_CLIP,
                              mask_input=l_mask,
                              gradient_steps=GRAD_STEPS,
                              precompute_input=True)

        # the 2nd lstm layer
        if skip_connect:
            # construct skip connection from the lookup table to the 2nd layer
            batch_size, seq_len, _ = input_var.shape
            # concatenate the last dimension of l_fwd_1 and embed
            l_fwd_1_shp = L.ReshapeLayer(l_fwd_1, (-1, NUM_HIDDEN))
            l_embed_shp = L.ReshapeLayer(l_embed, (-1, EMBED_DIM))
            to_next_layer = L.ReshapeLayer(
                L.concat([l_fwd_1_shp, l_embed_shp], axis=1),
                (batch_size, seq_len, NUM_HIDDEN + EMBED_DIM))
        else:
            to_next_layer = l_fwd_1

        to_next_layer_noise = L.dropout(to_next_layer, p=DROPOUT_RATE)

        in_gate = L.Gate(W_in=self.params['W_lstm2_xi'],
                         W_hid=self.params['W_lstm2_hi'],
                         W_cell=self.params['W_lstm2_ci'],
                         b=self.params['b_lstm2_i'],
                         nonlinearity=lasagne.nonlinearities.sigmoid)
        forget_gate = L.Gate(W_in=self.params['W_lstm2_xf'],
                             W_hid=self.params['W_lstm2_hf'],
                             W_cell=self.params['W_lstm2_cf'],
                             b=self.params['b_lstm2_f'],
                             nonlinearity=lasagne.nonlinearities.sigmoid)
        out_gate = L.Gate(W_in=self.params['W_lstm2_xo'],
                          W_hid=self.params['W_lstm2_ho'],
                          W_cell=self.params['W_lstm2_co'],
                          b=self.params['b_lstm2_o'],
                          nonlinearity=lasagne.nonlinearities.sigmoid)
        cell_gate = L.Gate(W_in=self.params['W_lstm2_xc'],
                           W_hid=self.params['W_lstm2_hc'],
                           W_cell=None,
                           b=self.params['b_lstm2_c'],
                           nonlinearity=lasagne.nonlinearities.tanh)
        l_fwd_2 = L.LSTMLayer(to_next_layer_noise,
                              NUM_HIDDEN,
                              ingate=in_gate,
                              forgetgate=forget_gate,
                              cell=cell_gate,
                              outgate=out_gate,
                              peepholes=True,
                              grad_clipping=GRAD_CLIP,
                              mask_input=l_mask,
                              gradient_steps=GRAD_STEPS,
                              precompute_input=True)

        # slice final states of both lstm layers
        l_fwd_1_slice = L.SliceLayer(l_fwd_1, -1, 1)
        l_fwd_2_slice = L.SliceLayer(l_fwd_2, -1, 1)

        # g will be used to score the words based on their embeddings
        g = L.DenseLayer(L.concat([l_fwd_1_slice, l_fwd_2_slice], axis=1),
                         num_units=EMBED_DIM,
                         W=self.params['W_dense'],
                         b=self.params['b_dense'],
                         nonlinearity=lasagne.nonlinearities.tanh)

        ## get outputs
        #g_out = L.get_output(g) # B x D
        #g_out_val = L.get_output(g, deterministic=True) # B x D

        ## compute softmax probs
        #probs,_ = theano.scan(fn=lambda g,d,dm,W: T.nnet.softmax(T.dot(g,W[d,:].T)*dm),
        #                    outputs_info=None,
        #                    sequences=[g_out,docidx_var,docidx_mask],
        #                    non_sequences=self.params['W_emb'])
        #predicted_probs = probs.reshape(docidx_var.shape) # B x N
        #probs_val,_ = theano.scan(fn=lambda g,d,dm,W: T.nnet.softmax(T.dot(g,W[d,:].T)*dm),
        #                    outputs_info=None,
        #                    sequences=[g_out_val,docidx_var,docidx_mask],
        #                    non_sequences=self.params['W_emb'])
        #predicted_probs_val = probs_val.reshape(docidx_var.shape) # B x N
        #return predicted_probs, predicted_probs_val

        # W is shared with the lookup table
        l_out = L.DenseLayer(g,
                             num_units=vocab_size,
                             W=self.params['W_emb'].T,
                             nonlinearity=lasagne.nonlinearities.softmax,
                             b=None)
        return l_out
    def fused_convnets(self,
                       fusion_level,
                       fusion_type,
                       input_var1=None,
                       input_var2=None,
                       bottleneck_W=None,
                       weights_dir=None):

        net = OrderedDict()
        net['input_rgb'] = InputLayer((None, 4, 128, 128),
                                      input_var=input_var1)
        layer = 0
        for i in range(fusion_level):
            # Add convolution layers
            net['conv_rgb{0:d}'.format(i + 1)] = Conv2DLayer(
                net.values()[layer],
                num_filters=self._net_specs_dict['num_conv_filters'][i],
                filter_size=(self._net_specs_dict['conv_filter_size'][i], ) *
                2,
                pad='same')
            layer += 1
            if self._net_specs_dict['num_conv_layers'] <= 2 and\
                    i != fusion_level - 1:
                # Add pooling layers
                net['pool_rgb{0:d}'.format(i + 1)] = MaxPool2DLayer(
                    net.values()[layer], pool_size=(3, 3))
                layer += 1
            else:
                if i < 4:
                    if (i + 1) % 2 == 0 and i != fusion_level - 1:
                        # Add pooling layers
                        net['pool_rgb{0:d}'.format(i + 1)] = MaxPool2DLayer(
                            net.values()[layer], pool_size=(3, 3))
                        layer += 1
                else:
                    if (i + 1) == 7 and i != fusion_level - 1:
                        # Add pooling layers
                        net['pool_rgb{0:d}'.format(i + 1)] = MaxPool2DLayer(
                            net.values()[layer], pool_size=(3, 3))
                        layer += 1

        net['input_depth'] = InputLayer((None, 1, 128, 128),
                                        input_var=input_var2)
        layer += 1
        for i in range(fusion_level):
            # Add convolution layers
            net['conv_depth{0:d}'.format(i + 1)] = Conv2DLayer(
                net.values()[layer],
                num_filters=self._net_specs_dict['num_conv_filters'][i],
                filter_size=(self._net_specs_dict['conv_filter_size'][i], ) *
                2,
                pad='same')
            layer += 1
            if self._net_specs_dict['num_conv_layers'] <= 2 and\
                    i != fusion_level - 1:
                # Add pooling layers
                net['pool_depth{0:d}'.format(i + 1)] = MaxPool2DLayer(
                    net.values()[layer], pool_size=(3, 3))
                layer += 1
            else:
                if i < 4:
                    if (i + 1) % 2 == 0 and i != fusion_level - 1:
                        # Add pooling layers
                        net['pool_depth{0:d}'.format(i+1)] =\
                            MaxPool2DLayer(net.values()[layer],
                                           pool_size=(3, 3))
                        layer += 1
                else:
                    if (i + 1) == 7 and i != fusion_level - 1:
                        # Add pooling layers
                        net['pool_depth{0:d}'.format(i+1)] =\
                            MaxPool2DLayer(net.values()[layer],
                                           pool_size=(3, 3))
                        layer += 1
        # Fuse ConvNets by fusion_level and fusion_type
        if fusion_type == self.MAX:
            net['merge'] =\
                ElemwiseMergeLayer([net['conv_rgb{0:d}'.format(fusion_level)],
                                    net['conv_depth{0:d}'.format(fusion_level)]
                                    ], T.maximum)
            layer += 1
        elif fusion_type == self.SUM:
            net['merge'] =\
                ElemwiseMergeLayer([net['conv_rgb{0:d}'.format(fusion_level)],
                                    net['conv_depth{0:d}'.format(fusion_level)]
                                    ], T.add)
            layer += 1
        elif fusion_type == self.CONCAT:
            net['merge'] = concat([
                net['conv_rgb{0:d}'.format(fusion_level)],
                net['conv_depth{0:d}'.format(fusion_level)]
            ])
            layer += 1
        elif fusion_type == self.CONCATCONV:
            net['concat'] = concat([
                net['conv_rgb{0:d}'.format(fusion_level)],
                net['conv_depth{0:d}'.format(fusion_level)]
            ])
            layer += 1
            net['merge'] = Conv2DLayer(
                net['concat'],
                num_filters=self._net_specs_dict['num_conv_filters'][
                    fusion_level - 1],
                filter_size=(1, 1),
                nonlinearity=None)
            layer += 1
        # Max-pooling to the merged
        if fusion_level in [2, 4, 7]:
            net['pool_merged'] = MaxPool2DLayer(net['merge'], pool_size=(3, 3))
            layer += 1
        # Continue the rest of the convolutional part of the network,
        # if the fusion took place before the last convolutional layer,
        # else just connect the convolutional part with the fully connected
        # part
        if self._net_specs_dict['num_conv_layers'] > fusion_level:
            for i in range(fusion_level,
                           self._net_specs_dict['num_conv_layers']):
                # Add convolution layers
                net['conv_merged{0:d}'.format(i + 1)] = Conv2DLayer(
                    net.values()[layer],
                    num_filters=self._net_specs_dict['num_conv_filters'][i],
                    filter_size=(self._net_specs_dict['conv_filter_size'][i], )
                    * 2,
                    pad='same')
                layer += 1
                if self._net_specs_dict['num_conv_layers'] <= 2:
                    # Add pooling layers
                    net['pool_merged{0:d}'.format(i + 1)] = MaxPool2DLayer(
                        net.values()[layer], pool_size=(3, 3))
                    layer += 1
                else:
                    if i < 4:
                        if (i + 1) % 2 == 0:
                            # Add pooling layers
                            net['pool_merged{0:d}'.format(i+1)] =\
                                MaxPool2DLayer(net.values()[layer],
                                               pool_size=(3, 3))
                            layer += 1
                    else:
                        if (i + 1) == 7:
                            # Add pooling layers
                            net['pool_merged{0:d}'.format(i+1)] =\
                                MaxPool2DLayer(net.values()[layer],
                                               pool_size=(3, 3))
                            layer += 1
        # Fc-layers
        net['fc1'] = DenseLayer(net.values()[layer],
                                self._net_specs_dict['num_fc_units'][0])
        # Add dropout layer
        net['dropout1'] = dropout(net['fc1'], p=self._model_hp_dict['p'])
        net['fc2'] = DenseLayer(net['dropout1'],
                                self._net_specs_dict['num_fc_units'][1])
        # Add dropout layer
        net['dropout2'] = dropout(net['fc2'], p=self._model_hp_dict['p'])
        if bottleneck_W is not None:
            # Add bottleneck layer
            net['bottleneck'] = DenseLayer(net['dropout2'], 30)
            # Add output layer(linear activation because it's regression)
            net['output'] = DenseLayer(
                net['bottleneck'],
                3 * self._num_joints,
                W=bottleneck_W[0:30],
                nonlinearity=lasagne.nonlinearities.tanh)
        else:
            # Add output layer(linear activation because it's regression)
            net['output'] = DenseLayer(
                net['dropout2'],
                3 * self._num_joints,
                nonlinearity=lasagne.nonlinearities.tanh)
        if weights_dir is not None:
            lw = LoadWeights(weights_dir, net)
            lw.load_weights_numpy()
        return net
def build_generator_lstm(input_var, noise_size, cond_var=None, n_conds=0,
                         arch='lstm', with_BatchNorm=True, batch_size=None,
                         n_steps=None):
    from lasagne.layers import (
        InputLayer, DenseLayer, LSTMLayer, ReshapeLayer, DimshuffleLayer,
        concat, ExpressionLayer, NonlinearityLayer, DropoutLayer)

    from lasagne.init import Constant, HeNormal
    from lasagne.nonlinearities import rectify, softmax
    non_lin = rectify

    layer = InputLayer(
        shape=(batch_size, n_steps, noise_size), input_var=input_var)
    if cond_var is not None:
        layer = BatchNorm(DenseLayer(
            layer, noise_size, nonlinearity=non_lin), with_BatchNorm)
        layer = concat(
            [layer, InputLayer(shape=(batch_size, n_steps, n_conds),
                               input_var=cond_var)])
    if arch == 'lstm':
        layer = batch_norm(DenseLayer(layer, 1024, num_leading_axes=2))
        # recurrent layers for bidirectional network
        l_forward_noise = BatchNorm(LSTMLayer(
            layer, 512, learn_init=True, grad_clipping=100,
            only_return_final=False), with_BatchNorm)
        l_backward_noise = BatchNorm(LSTMLayer(
            layer, 512, learn_init=True, grad_clipping=100,
            only_return_final=False, backwards=True), with_BatchNorm)
        layer = concat([l_forward_noise, l_backward_noise], axis=2)
        # dense layers
        layer = BatchNorm(DenseLayer(
            layer, 1024, num_leading_axes=2), with_BatchNorm)
        layer = BatchNorm(DenseLayer(
            layer, 128, num_leading_axes=2), with_BatchNorm)
        # reshape to apply softmax per timestep
        layer = ReshapeLayer(layer, (-1, [2]))
        layer = NonlinearityLayer(layer, softmax)
        layer = ReshapeLayer(layer, (input_var.shape[0], -1, [1]))
        layer = DimshuffleLayer(layer, (0, 'x', 2, 1))
        layer = ExpressionLayer(layer, lambda X: X*2 - 1)
    elif arch == 1:
        # input layers
        l_in = InputLayer(
            shape=params['input_shape'], input_var=params['input_var'],
            name='g_in')
        l_noise = InputLayer(
            shape=params['noise_shape'], input_var=params['noise_var'],
            name='g_noise')
        l_cond = InputLayer(
            shape=params['cond_shape'], input_var=params['cond_var'],
            name='g_cond')
        l_mask = InputLayer(
            shape=params['mask_shape'], input_var=params['mask_var'],
            name='g_mask')

        # recurrent layers for bidirectional network
        l_forward_data = LSTMLayer(
            l_in, params['n_units'][0], mask_input=l_mask,
            ingate=gate_params, forgetgate=gate_params,
            cell=cell_params, outgate=gate_params,
            learn_init=True, grad_clipping=params['grad_clip'],
            only_return_final=False,
            nonlinearity=params['non_linearities'][0])
        l_forward_noise = LSTMLayer(
            l_noise, params['n_units'][0], mask_input=l_mask,
            ingate=gate_params, forgetgate=gate_params,
            cell=cell_params, outgate=gate_params,
            learn_init=True, grad_clipping=params['grad_clip'],
            only_return_final=False,
            nonlinearity=params['non_linearities'][1])

        l_backward_data = LSTMLayer(
            l_in, params['n_units'][0], mask_input=l_mask,
            ingate=gate_params, forgetgate=gate_params,
            cell=cell_params, outgate=gate_params,
            learn_init=True, grad_clipping=params['grad_clip'],
            only_return_final=False, backwards=True,
            nonlinearity=params['non_linearities'][0])
        l_backward_noise = LSTMLayer(
            l_noise, params['n_units'][0], mask_input=l_mask,
            ingate=gate_params, forgetgate=gate_params,
            cell=cell_params, outgate=gate_params,
            learn_init=True, grad_clipping=params['grad_clip'],
            only_return_final=False, backwards=True,
            nonlinearity=params['non_linearities'][1])

        # concatenate output of forward and backward layers
        l_lstm_concat = concat(
            [l_forward_data, l_forward_noise, l_backward_data,
             l_backward_noise], axis=2)

        # dense layer on output of data and noise lstms, w/dropout
        l_lstm_dense = DenseLayer(
            DropoutLayer(l_lstm_concat, p=0.5),
            num_units=params['n_units'][1], num_leading_axes=2,
            W=HeNormal(gain='relu'), b=Constant(0.1),
            nonlinearity=params['non_linearities'][2])

        # batch norm for lstm dense
        # l_lstm_dense = lasagne.layer.BatchNorm(l_lstm_dense)

        # concatenate dense layer of lstsm with condition
        l_lstm_cond_concat = concat(
            [l_lstm_dense, l_cond], axis=2)

        # dense layer with dense layer lstm and condition, w/dropout
        l_out = DenseLayer(
            DropoutLayer(l_lstm_cond_concat, p=0.5),
            num_units=params['n_units'][2],
            num_leading_axes=2,
            W=HeNormal(gain=1.0), b=Constant(0.1),
            nonlinearity=params['non_linearities'][3])
    elif arch == 2:
        raise Exception("arch 2 not implemented")
    elif arch == 3:
        raise Exception("arch 2 not implemented")

    print("Generator output:", layer.output_shape)
    return layer
    def build_network(self, K, vocab_size, W_init):

        l_docin = L.InputLayer(shape=(None, None, 1), input_var=self.inps[0])
        l_doctokin = L.InputLayer(shape=(None, None), input_var=self.inps[1])
        l_qin = L.InputLayer(shape=(None, None, 1), input_var=self.inps[2])
        l_qtokin = L.InputLayer(shape=(None, None), input_var=self.inps[3])
        l_docmask = L.InputLayer(shape=(None, None), input_var=self.inps[6])
        l_qmask = L.InputLayer(shape=(None, None), input_var=self.inps[7])
        l_tokin = L.InputLayer(shape=(None, MAX_WORD_LEN),
                               input_var=self.inps[8])
        l_tokmask = L.InputLayer(shape=(None, MAX_WORD_LEN),
                                 input_var=self.inps[9])
        l_featin = L.InputLayer(shape=(None, None), input_var=self.inps[11])

        doc_shp = self.inps[1].shape
        qry_shp = self.inps[3].shape

        l_docembed = L.EmbeddingLayer(l_docin,
                                      input_size=vocab_size,
                                      output_size=self.embed_dim,
                                      W=W_init)  # B x N x 1 x DE
        l_doce = L.ReshapeLayer(
            l_docembed, (doc_shp[0], doc_shp[1], self.embed_dim))  # B x N x DE
        l_qembed = L.EmbeddingLayer(l_qin,
                                    input_size=vocab_size,
                                    output_size=self.embed_dim,
                                    W=l_docembed.W)
        l_qembed = L.ReshapeLayer(
            l_qembed, (qry_shp[0], qry_shp[1], self.embed_dim))  # B x N x DE
        l_fembed = L.EmbeddingLayer(l_featin, input_size=2,
                                    output_size=2)  # B x N x 2

        if self.train_emb == 0:
            l_docembed.params[l_docembed.W].remove('trainable')

        # char embeddings
        if self.use_chars:
            l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars,
                                        2 * self.char_dim)  # T x L x D
            l_fgru = L.GRULayer(l_lookup,
                                self.char_dim,
                                grad_clipping=GRAD_CLIP,
                                mask_input=l_tokmask,
                                gradient_steps=GRAD_STEPS,
                                precompute_input=True,
                                only_return_final=True)
            l_bgru = L.GRULayer(l_lookup,
                                2 * self.char_dim,
                                grad_clipping=GRAD_CLIP,
                                mask_input=l_tokmask,
                                gradient_steps=GRAD_STEPS,
                                precompute_input=True,
                                backwards=True,
                                only_return_final=True)  # T x 2D
            l_fwdembed = L.DenseLayer(l_fgru,
                                      self.embed_dim / 2,
                                      nonlinearity=None)  # T x DE/2
            l_bckembed = L.DenseLayer(l_bgru,
                                      self.embed_dim / 2,
                                      nonlinearity=None)  # T x DE/2
            l_embed = L.ElemwiseSumLayer([l_fwdembed, l_bckembed], coeffs=1)
            l_docchar_embed = IndexLayer([l_doctokin, l_embed])  # B x N x DE/2
            l_qchar_embed = IndexLayer([l_qtokin, l_embed])  # B x Q x DE/2

            l_doce = L.ConcatLayer([l_doce, l_docchar_embed], axis=2)
            l_qembed = L.ConcatLayer([l_qembed, l_qchar_embed], axis=2)

        l_fwd_q = L.GRULayer(l_qembed,
                             self.nhidden,
                             grad_clipping=GRAD_CLIP,
                             mask_input=l_qmask,
                             gradient_steps=GRAD_STEPS,
                             precompute_input=True,
                             only_return_final=False)
        l_bkd_q = L.GRULayer(l_qembed,
                             self.nhidden,
                             grad_clipping=GRAD_CLIP,
                             mask_input=l_qmask,
                             gradient_steps=GRAD_STEPS,
                             precompute_input=True,
                             backwards=True,
                             only_return_final=False)

        l_q = L.ConcatLayer([l_fwd_q, l_bkd_q])  # B x Q x 2D
        q = L.get_output(l_q)  # B x Q x 2D
        q = q[T.arange(q.shape[0]), self.inps[12], :]  # B x 2D

        l_qs = [l_q]
        for i in range(K - 1):
            l_fwd_doc_1 = L.GRULayer(l_doce,
                                     self.nhidden,
                                     grad_clipping=GRAD_CLIP,
                                     mask_input=l_docmask,
                                     gradient_steps=GRAD_STEPS,
                                     precompute_input=True)
            l_bkd_doc_1 = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP,
                    mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True, \
                            backwards=True)

            l_doc_1 = L.concat([l_fwd_doc_1, l_bkd_doc_1],
                               axis=2)  # B x N x DE

            l_fwd_q_1 = L.GRULayer(l_qembed,
                                   self.nhidden,
                                   grad_clipping=GRAD_CLIP,
                                   mask_input=l_qmask,
                                   gradient_steps=GRAD_STEPS,
                                   precompute_input=True)
            l_bkd_q_1 = L.GRULayer(l_qembed,
                                   self.nhidden,
                                   grad_clipping=GRAD_CLIP,
                                   mask_input=l_qmask,
                                   gradient_steps=GRAD_STEPS,
                                   precompute_input=True,
                                   backwards=True)

            l_q_c_1 = L.ConcatLayer([l_fwd_q_1, l_bkd_q_1],
                                    axis=2)  # B x Q x DE
            l_qs.append(l_q_c_1)

            qd = L.get_output(l_q_c_1)  # B x Q x DE
            dd = L.get_output(l_doc_1)  # B x N x DE
            M = T.batched_dot(dd, qd.dimshuffle((0, 2, 1)))  # B x N x Q
            alphas = T.nnet.softmax(
                T.reshape(M, (M.shape[0] * M.shape[1], M.shape[2])))
            alphas_r = T.reshape(alphas, (M.shape[0],M.shape[1],M.shape[2]))* \
                    self.inps[7][:,np.newaxis,:] # B x N x Q
            alphas_r = alphas_r / alphas_r.sum(axis=2)[:, :,
                                                       np.newaxis]  # B x N x Q
            q_rep = T.batched_dot(alphas_r, qd)  # B x N x DE

            l_q_rep_in = L.InputLayer(shape=(None, None, 2 * self.nhidden),
                                      input_var=q_rep)
            l_doc_2_in = L.ElemwiseMergeLayer([l_doc_1, l_q_rep_in], T.mul)
            l_doce = L.dropout(l_doc_2_in, p=self.dropout)  # B x N x DE

        if self.use_feat:
            l_doce = L.ConcatLayer([l_doce, l_fembed], axis=2)  # B x N x DE+2
        l_fwd_doc = L.GRULayer(l_doce,
                               self.nhidden,
                               grad_clipping=GRAD_CLIP,
                               mask_input=l_docmask,
                               gradient_steps=GRAD_STEPS,
                               precompute_input=True)
        l_bkd_doc = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP,
                mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True, \
                        backwards=True)

        l_doc = L.concat([l_fwd_doc, l_bkd_doc], axis=2)

        d = L.get_output(l_doc)  # B x N x 2D
        p = T.batched_dot(d, q)  # B x N
        pm = T.nnet.softmax(p) * self.inps[10]
        pm = pm / pm.sum(axis=1)[:, np.newaxis]
        final = T.batched_dot(pm, self.inps[4])

        dv = L.get_output(l_doc, deterministic=True)  # B x N x 2D
        p = T.batched_dot(dv, q)  # B x N
        pm = T.nnet.softmax(p) * self.inps[10]
        pm = pm / pm.sum(axis=1)[:, np.newaxis]
        final_v = T.batched_dot(pm, self.inps[4])

        return final, final_v, l_doc, l_qs, l_docembed.W
def build_generator(input_var, noise_size, cond_var=None, n_conds=0, arch=0,
                    with_BatchNorm=True, batch_size=None, n_steps=None):
    from lasagne.layers import InputLayer, ReshapeLayer, DenseLayer, concat
    from lasagne.layers import Upscale2DLayer, Conv2DLayer
    from lasagne.layers import TransposedConv2DLayer as Deconv2DLayer
    from lasagne.nonlinearities import LeakyRectify, rectify
    from lasagne.init import GlorotUniform, Normal, Orthogonal

    # non_lin = LeakyRectify(0.01)
    non_lin = rectify
    # init = Orthogonal(np.sqrt(2/(1+0.01**2)))
    init = Normal(0.02, 0.0)
    # init = GlorotUniform()

    layer = InputLayer(shape=(batch_size, noise_size), input_var=input_var)
    if cond_var is not None:
        layer = BatchNorm(DenseLayer(
            layer, noise_size, nonlinearity=non_lin), with_BatchNorm)
        layer = concat([
            layer, InputLayer(shape=(batch_size, n_conds), input_var=cond_var)])
    if arch == 'dcgan':
        # DCGAN
        layer = BatchNorm(DenseLayer(
            layer, 1024*4*4, W=init, b=None, nonlinearity=non_lin))
        layer = ReshapeLayer(layer, ([0], 1024, 4, 4))
        layer = BatchNorm(Deconv2DLayer(
            layer, 512, 5, stride=2, crop=(2, 2), W=init, b=None,
            output_size=8, nonlinearity=non_lin), with_BatchNorm)
        layer = BatchNorm(Deconv2DLayer(
            layer, 256, 5, stride=2, crop=(2, 2), W=init, b=None,
            output_size=16, nonlinearity=non_lin), with_BatchNorm)
        layer = BatchNorm(Deconv2DLayer(
            layer, 128, 5, stride=2, crop=(2, 2), W=init, b=None,
            output_size=32, nonlinearity=non_lin), with_BatchNorm)
        layer = BatchNorm(Deconv2DLayer(
            layer, 64, 5, stride=2, crop=(2, 2), W=init, b=None,
            output_size=64, nonlinearity=non_lin), with_BatchNorm)
        layer = Deconv2DLayer(
            layer, 1, 5, stride=2, crop=(2, 2), W=init, b=None,
            output_size=128, nonlinearity=tanh_temperature)
    elif arch == 'mnist':
        # Jan Schluechter MNIST generator
        # fully-connected layers
        layer = BatchNorm(DenseLayer(
            layer, 1024, W=init, b=None), with_BatchNorm)
        # project and reshape
        layer = BatchNorm(DenseLayer(
            layer, 1024*8*8, W=init, b=None), with_BatchNorm)
        layer = ReshapeLayer(layer, ([0], 1024, 8, 8))
        # fractional-stride convolutions
        layer = BatchNorm(Deconv2DLayer(
            layer, 512, 5, stride=2, crop='same', W=init, b=None,
            output_size=16, nonlinearity=non_lin), with_BatchNorm)
        layer = BatchNorm(Deconv2DLayer(
            layer, 256, 5, stride=2, crop='same', W=init, b=None,
            output_size=32, nonlinearity=non_lin), with_BatchNorm)
        layer = BatchNorm(Deconv2DLayer(
            layer, 128, 5, stride=2, crop='same', W=init, b=None,
            output_size=64, nonlinearity=non_lin), with_BatchNorm)
        layer = Deconv2DLayer(
            layer, 1, 5, stride=2, crop='same', W=init, b=None,
            output_size=128, nonlinearity=tanh_temperature)
    elif 'cont-enc':
        # build generator from concatenated prefix and noise features
        layer = ReshapeLayer(layer, ([0], layer.output_shape[1], 1, 1))
        layer = BatchNorm(Deconv2DLayer(
            layer, 1024, 4, stride=1, crop=0, W=init), with_BatchNorm)
        layer = BatchNorm(Deconv2DLayer(
            layer, 512, 4, stride=2, crop=1, W=init), with_BatchNorm)
        layer = BatchNorm(Deconv2DLayer(
            layer, 256, 4, stride=2, crop=1, W=init), with_BatchNorm)
        layer = BatchNorm(Deconv2DLayer(
            layer, 128, 4, stride=2, crop=1, W=init), with_BatchNorm)
        layer = BatchNorm(Deconv2DLayer(
            layer, 128, 4, stride=2, crop=1, W=init), with_BatchNorm)
        layer = Deconv2DLayer(
            layer, 1, 4, stride=2, crop=1, W=init,
            nonlinearity=tanh_temperature)
    elif 'lsgan':
        layer = batch_norm(DenseLayer(layer, 1024))
        layer = batch_norm(DenseLayer(layer, 1024*8*8))
        layer = ReshapeLayer(layer, ([0], 1024, 8, 8))
        layer = batch_norm(Deconv2DLayer(
            layer, 256, 5, stride=2, crop='same', output_size=16))
        layer = batch_norm(Deconv2DLayer(
            layer, 256, 5, stride=2, crop='same', output_size=32))
        layer = batch_norm(Deconv2DLayer(
            layer, 256, 5, stride=2, crop='same', output_size=64))
        layer = Deconv2DLayer(
            layer, 1, 5, stride=2, crop='same', output_size=128,
            nonlinearity=tanh_temperature)
    elif arch == 2:
        # non-overlapping transposed convolutions
        # fully-connected layers
        layer = BatchNorm(DenseLayer(
            layer, 1024, W=init, b=None), with_BatchNorm)
        layer = BatchNorm(DenseLayer(
            layer, 1024, W=init, b=None), with_BatchNorm)
        # project and reshape
        layer = BatchNorm(DenseLayer(layer, 256*36*36), with_BatchNorm)
        layer = ReshapeLayer(layer, ([0], 256, 36, 36))
        # two fractional-stride convolutions
        layer = BatchNorm(Deconv2DLayer(
            layer, 256, 4, stride=2, crop='full', b=None, nonlinearity=non_lin),
            with_BatchNorm)
        layer = Deconv2DLayer(
            layer, 1, 8, stride=2, crop='full', b=None,
            nonlinearity=tanh_temperature)
    elif arch == 3:
        # resize-convolution, more full layer weights less convolutions
        # fully-connected layers
        layer = BatchNorm(DenseLayer(
            layer, 1024, W=init, b=None), with_BatchNorm)
        layer = BatchNorm(DenseLayer(
            layer, 1024, W=init, b=None), with_BatchNorm)
        # project and reshape
        layer = BatchNorm(DenseLayer(layer, 32*68*68), with_BatchNorm)
        layer = ReshapeLayer(layer, ([0], 32, 68, 68))
        # resize-convolutions
        layer = BatchNorm(Conv2DLayer(
            layer, 256, 3, stride=1, pad='valid'), with_BatchNorm)
        layer = Upscale2DLayer(layer, (2, 2))
        layer = Conv2DLayer(
            layer, 1, 5, stride=1, pad='valid', nonlinearity=tanh_temperature)
    elif arch == 4:
        # resize-convolution, less full layer weights more convolutions
        # fully-connected layers
        layer = BatchNorm(DenseLayer(
            layer, 1024, W=init, b=None), with_BatchNorm)
        layer = BatchNorm(DenseLayer(
            layer, 1024, W=init, b=None), with_BatchNorm)
        # project and reshape
        layer = BatchNorm(DenseLayer(layer, 128*18*18), with_BatchNorm)
        layer = ReshapeLayer(layer, ([0], 128, 18, 18))
        # resize-convolutions
        layer = Upscale2DLayer(layer, (2, 2), mode='bilinear')
        layer = BatchNorm(Conv2DLayer(
            layer, 256, 3, stride=1, pad='valid', nonlinearity=non_lin),
            with_BatchNorm)
        layer = Upscale2DLayer(layer, (2, 2), mode='bilinear')
        layer = BatchNorm(Conv2DLayer(
            layer, 256, 3, stride=1, pad='valid', nonlinearity=non_lin),
            with_BatchNorm)
        layer = Upscale2DLayer(layer, (2, 2), mode='bilinear')
        layer = Conv2DLayer(
            layer, 1, 5, stride=1, pad='valid',
            nonlinearity=tanh_temperature)
    elif arch == 'crepe_up':
        # CREPE transposed with upscaling
        # fully-connected layers
        layer = BatchNorm(DenseLayer(
            layer, 1024, W=init, b=None), with_BatchNorm)
        layer = BatchNorm(DenseLayer(
            layer, 1024, W=init, b=None), with_BatchNorm)
        # project and reshape
        layer = BatchNorm(DenseLayer(layer, 2**15*1*3), with_BatchNorm)
        layer = ReshapeLayer(layer, ([0], 2**15, 1, 3))
        # temporal convolutions
        layer = BatchNorm(Deconv2DLayer(
            layer, 256, (1, 3), stride=1, crop=0, W=init, b=None,
            nonlinearity=non_lin), with_BatchNorm)
        layer = BatchNorm(Deconv2DLayer(
            layer, 256, (1, 3), stride=1, crop=0, W=init, b=None,
            nonlinearity=non_lin), with_BatchNorm)
        layer = BatchNorm(Deconv2DLayer(
            layer, 256, (1, 3), stride=1, crop=0, W=init, b=None,
            nonlinearity=non_lin), with_BatchNorm)
        layer = BatchNorm(Deconv2DLayer(
            layer, 256, (1, 3), stride=1, crop=0, W=init, b=None,
            nonlinearity=non_lin), with_BatchNorm)
        layer = Upscale2DLayer(layer, (1, 3), mode='repeat')
        layer = BatchNorm(Deconv2DLayer(
            layer, 512, (1, 9), stride=1, crop=0, W=init, b=None,
            nonlinearity=non_lin), with_BatchNorm)
        layer = Upscale2DLayer(layer, (1, 3), mode='repeat')
        layer = Deconv2DLayer(
            layer, 1, (128, 6), stride=1, crop=0, W=init, b=None,
            nonlinearity=tanh_temperature)
    elif arch == 'crepe_noup_a':
        # CREPE transposed no upscaling
        # fully-connected layer
        layer = BatchNorm(DenseLayer(
            layer, 1024, W=init, b=None), with_BatchNorm)
        # project and reshape
        layer = BatchNorm(DenseLayer(
            layer, 1024*1*3, W=init, b=None), with_BatchNorm)
        layer = ReshapeLayer(layer, ([0], 1024, 1, 3))
        # temporal convolutions
        layer = BatchNorm(Deconv2DLayer(
            layer, 256, (1, 3), stride=1, crop=0, W=init, b=None,
            nonlinearity=non_lin), with_BatchNorm)
        layer = BatchNorm(Deconv2DLayer(
            layer, 256, (1, 3), stride=1, crop=0, W=init, b=None,
            nonlinearity=non_lin), with_BatchNorm)
        layer = BatchNorm(Deconv2DLayer(
            layer, 256, (1, 3), stride=1, crop=0, W=init, b=None,
            nonlinearity=non_lin), with_BatchNorm)
        layer = BatchNorm(Deconv2DLayer(
            layer, 256, (1, 3), stride=1, crop=0, W=init, b=None,
            nonlinearity=non_lin), with_BatchNorm)
        layer = BatchNorm(Deconv2DLayer(
            layer, 256, (1, 3), stride=1, crop=0, W=init, b=None,
            nonlinearity=non_lin), with_BatchNorm)
        layer = BatchNorm(Deconv2DLayer(
            layer, 512, (1, 7), stride=1, crop=0, W=init, b=None,
            nonlinearity=non_lin), with_BatchNorm)
        layer = BatchNorm(Deconv2DLayer(
            layer, 1024, (128, 7), stride=3, crop=0, W=init, b=None,
            nonlinearity=non_lin), with_BatchNorm)
        layer = Deconv2DLayer(
            layer, 1, (1, 8), stride=1, crop=0, W=init, b=None,
            nonlinearity=tanh_temperature)
    elif arch == 'crepe_noup_b':
        # CREPE transposed no upscaling
        # fully-connected layer
        layer = BatchNorm(DenseLayer(layer, 1024))
        # project and reshape
        layer = BatchNorm(DenseLayer(layer, 1024*1*3))
        layer = ReshapeLayer(layer, ([0], 1024, 1, 3))
        # temporal convolutions
        layer = BatchNorm(Deconv2DLayer(
            layer, 256, (1, 3), stride=1, crop=0,
            nonlinearity=non_lin))
        layer = BatchNorm(Deconv2DLayer(
            layer, 256, (1, 3), stride=1, crop=0, nonlinearity=non_lin))
        layer = BatchNorm(Deconv2DLayer(
            layer, 256, (1, 3), stride=1, crop=0, nonlinearity=non_lin))
        layer = BatchNorm(Deconv2DLayer(
            layer, 256, (1, 3), stride=1, crop=0, nonlinearity=non_lin))
        layer = BatchNorm(Deconv2DLayer(
            layer, 256, (1, 3), stride=3, crop=0, nonlinearity=non_lin))
        layer = Deconv2DLayer(
            layer, 512, (1, 9), stride=1, crop=0, nonlinearity=non_lin)
        layer = Deconv2DLayer(
            layer, 1, (128, 8), stride=3, crop=0, nonlinearity=tanh_temperature)
    else:
        return None

    print("Generator output:", layer.output_shape)
    return layer
Beispiel #35
0
def reference_model():
    net = {}
    net['data'] = InputLayer(shape=(None, 3, 227, 227))

    # conv1
    net['conv1'] = Conv2DLayer(
        net['data'],
        num_filters=96,
        filter_size=(11, 11),
        stride = 4,
        nonlinearity=lasagne.nonlinearities.rectify)

    
    # pool1
    net['pool1'] = MaxPool2DLayer(net['conv1'], pool_size=(3, 3), stride=2)

    # norm1
    net['norm1'] = LocalResponseNormalization2DLayer(net['pool1'],
                                                     n=5,
                                                     alpha=0.0001/5.0,
                                                     beta = 0.75,
                                                     k=1)

    # conv2
    # The caffe reference model uses a parameter called group.
    # This parameter splits input to the convolutional layer.
    # The first half of the filters operate on the first half
    # of the input from the previous layer. Similarly, the
    # second half operate on the second half of the input.
    #
    # Lasagne does not have this group parameter, but we can
    # do it ourselves.
    #
    # see https://github.com/BVLC/caffe/issues/778
    # also see https://code.google.com/p/cuda-convnet/wiki/LayerParams
    
    # before conv2 split the data
    net['conv2_data1'] = SliceLayer(net['norm1'], indices=slice(0, 48), axis=1)
    net['conv2_data2'] = SliceLayer(net['norm1'], indices=slice(48,96), axis=1)

    # now do the convolutions
    net['conv2_part1'] = Conv2DLayer(net['conv2_data1'],
                                     num_filters=128,
                                     filter_size=(5, 5),
                                     pad = 2)
    net['conv2_part2'] = Conv2DLayer(net['conv2_data2'],
                                     num_filters=128,
                                     filter_size=(5,5),
                                     pad = 2)

    # now combine
    net['conv2'] = concat((net['conv2_part1'],net['conv2_part2']),axis=1)
    
    # pool2
    net['pool2'] = MaxPool2DLayer(net['conv2'], pool_size=(3, 3), stride = 2)
    
    # norm2
    net['norm2'] = LocalResponseNormalization2DLayer(net['pool2'],
                                                     n=5,
                                                     alpha=0.0001/5.0,
                                                     beta = 0.75,
                                                     k=1)
    
    # conv3
    # no group
    net['conv3'] = Conv2DLayer(net['norm2'],
                               num_filters=384,
                               filter_size=(3, 3),
                               pad = 1)

    # conv4
    # group = 2
    net['conv4_data1'] = SliceLayer(net['conv3'], indices=slice(0, 192), axis=1)
    net['conv4_data2'] = SliceLayer(net['conv3'], indices=slice(192,384), axis=1)
    net['conv4_part1'] = Conv2DLayer(net['conv4_data1'],
                                     num_filters=192,
                                     filter_size=(3, 3),
                                     pad = 1)
    net['conv4_part2'] = Conv2DLayer(net['conv4_data2'],
                                     num_filters=192,
                                     filter_size=(3,3),
                                     pad = 1)
    net['conv4'] = concat((net['conv4_part1'],net['conv4_part2']),axis=1)
    
    # conv5
    # group 2
    net['conv5_data1'] = SliceLayer(net['conv4'], indices=slice(0, 192), axis=1)
    net['conv5_data2'] = SliceLayer(net['conv4'], indices=slice(192,384), axis=1)
    net['conv5_part1'] = Conv2DLayer(net['conv5_data1'],
                                     num_filters=128,
                                     filter_size=(3, 3),
                                     pad = 1)
    net['conv5_part2'] = Conv2DLayer(net['conv5_data2'],
                                     num_filters=128,
                                     filter_size=(3,3),
                                     pad = 1)
    net['conv5'] = concat((net['conv5_part1'],net['conv5_part2']),axis=1)
    
    # pool 5
    net['pool5'] = MaxPool2DLayer(net['conv5'], pool_size=(3, 3), stride = 2)

    # fc6
    net['fc6'] = DenseLayer(
            net['pool5'],num_units=4096,
            nonlinearity=lasagne.nonlinearities.rectify)

    # fc7
    net['fc7'] = DenseLayer(
        net['fc6'],
        num_units=4096,
        nonlinearity=lasagne.nonlinearities.rectify)

    # fc8
    net['fc8'] = DenseLayer(
        net['fc7'],
        num_units=1000,
        nonlinearity=lasagne.nonlinearities.softmax)
    
    return net
Beispiel #36
0
 def get_params_internal(self, **tags):
     return L.get_all_params(
         L.concat(self._output_layers),
         **tags
     )#, key=lambda x: x.name)
Beispiel #37
0
def build_model(x=None, layer='fc8', shape=(None, 3, 227, 227), up_scale=4):
    net = {'data': InputLayer(shape=shape, input_var=x)}
    net['data_s'] = Upscale2DLayer(net['data'], up_scale)
    net['conv1'] = Conv2DLayer(net['data_s'],
                               num_filters=96,
                               filter_size=(11, 11),
                               stride=4,
                               nonlinearity=lasagne.nonlinearities.rectify)

    if layer is 'conv1':
        return net

    # pool1
    net['pool1'] = MaxPool2DLayer(net['conv1'], pool_size=(3, 3), stride=2)

    # norm1
    net['norm1'] = LocalResponseNormalization2DLayer(net['pool1'],
                                                     n=5,
                                                     alpha=0.0001 / 5.0,
                                                     beta=0.75,
                                                     k=1)

    # conv2
    # before conv2 split the data
    net['conv2_data1'] = SliceLayer(net['norm1'], indices=slice(0, 48), axis=1)
    net['conv2_data2'] = SliceLayer(net['norm1'],
                                    indices=slice(48, 96),
                                    axis=1)

    # now do the convolutions
    net['conv2_part1'] = Conv2DLayer(net['conv2_data1'],
                                     num_filters=128,
                                     filter_size=(5, 5),
                                     pad=2)
    net['conv2_part2'] = Conv2DLayer(net['conv2_data2'],
                                     num_filters=128,
                                     filter_size=(5, 5),
                                     pad=2)

    # now combine
    net['conv2'] = concat((net['conv2_part1'], net['conv2_part2']), axis=1)
    if layer is 'conv2':
        return net
    # pool2
    net['pool2'] = MaxPool2DLayer(net['conv2'], pool_size=(3, 3), stride=2)

    # norm2
    net['norm2'] = LocalResponseNormalization2DLayer(net['pool2'],
                                                     n=5,
                                                     alpha=0.0001 / 5.0,
                                                     beta=0.75,
                                                     k=1)
    # conv3
    # no group
    net['conv3'] = Conv2DLayer(net['norm2'],
                               num_filters=384,
                               filter_size=(3, 3),
                               pad=1)
    if layer is 'conv3':
        return net

    # conv4
    net['conv4_data1'] = SliceLayer(net['conv3'],
                                    indices=slice(0, 192),
                                    axis=1)
    net['conv4_data2'] = SliceLayer(net['conv3'],
                                    indices=slice(192, 384),
                                    axis=1)
    net['conv4_part1'] = Conv2DLayer(net['conv4_data1'],
                                     num_filters=192,
                                     filter_size=(3, 3),
                                     pad=1)
    net['conv4_part2'] = Conv2DLayer(net['conv4_data2'],
                                     num_filters=192,
                                     filter_size=(3, 3),
                                     pad=1)
    net['conv4'] = concat((net['conv4_part1'], net['conv4_part2']), axis=1)
    if layer is 'conv4':
        return net

    # conv5
    # group 2
    net['conv5_data1'] = SliceLayer(net['conv4'],
                                    indices=slice(0, 192),
                                    axis=1)
    net['conv5_data2'] = SliceLayer(net['conv4'],
                                    indices=slice(192, 384),
                                    axis=1)
    net['conv5_part1'] = Conv2DLayer(net['conv5_data1'],
                                     num_filters=128,
                                     filter_size=(3, 3),
                                     pad=1)
    net['conv5_part2'] = Conv2DLayer(net['conv5_data2'],
                                     num_filters=128,
                                     filter_size=(3, 3),
                                     pad=1)
    net['conv5'] = concat((net['conv5_part1'], net['conv5_part2']), axis=1)
    if layer is 'conv5':
        return net

    # pool 5
    net['pool5'] = MaxPool2DLayer(net['conv5'], pool_size=(3, 3), stride=2)

    # fc6
    net['fc6'] = DenseLayer(net['pool5'],
                            num_units=4096,
                            nonlinearity=lasagne.nonlinearities.rectify)
    if layer is 'fc6':
        return net

    # fc7
    net['fc7'] = DenseLayer(net['fc6'],
                            num_units=4096,
                            nonlinearity=lasagne.nonlinearities.rectify)
    if layer is 'fc7':
        return net

    # fc8
    net['fc8'] = DenseLayer(net['fc7'],
                            num_units=1000,
                            nonlinearity=lasagne.nonlinearities.softmax)
    if layer is 'fc8':
        # st()
        return net
def build_critic(input_var=None, cond_var=None, n_conds=0, arch=0,
                 with_BatchNorm=True, loss_type='wgan'):
    from lasagne.layers import (
        InputLayer, Conv2DLayer, DenseLayer, MaxPool2DLayer, concat,
        dropout, flatten)
    from lasagne.nonlinearities import rectify, LeakyRectify
    from lasagne.init import GlorotUniform  # Normal
    lrelu = LeakyRectify(0.2)
    layer = InputLayer(
        shape=(None, 1, 128, 128), input_var=input_var, name='d_in_data')
    # init = Normal(0.02, 0.0)
    init = GlorotUniform()

    if cond_var:
        # class: from data or from generator input
        layer_cond = InputLayer(
            shape=(None, n_conds), input_var=cond_var, name='d_in_condition')
        layer_cond = BatchNorm(DenseLayer(
            layer_cond, 1024, W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
    if arch == 'dcgan':
        # DCGAN inspired
        layer = BatchNorm(Conv2DLayer(
            layer, 32, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 64, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 128, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 256, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 512, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
    elif arch == 'cont-enc':
        # convolution layers
        layer = BatchNorm(Conv2DLayer(
            layer, 64, 4, stride=2, pad=1, W=init, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 64, 4, stride=2, pad=1, W=init, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 128, 4, stride=2, pad=1, W=init, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 256, 4, stride=2, pad=1, W=init, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 512, 4, stride=2, pad=1, W=init, nonlinearity=lrelu),
            with_BatchNorm)
    elif arch == 'mnist':
        # Jan Schluechter's MNIST discriminator
        # convolution layers
        layer = BatchNorm(Conv2DLayer(
            layer, 128, 5, stride=2, pad='same', W=init, b=None,
            nonlinearity=lrelu), with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 128, 5, stride=2, pad='same', W=init, b=None,
            nonlinearity=lrelu), with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 128, 5, stride=2, pad='same', W=init, b=None,
            nonlinearity=lrelu), with_BatchNorm)
        # layer = BatchNorm(Conv2DLayer(
        #     layer, 128, 5, stride=2, pad='same', W=init, b=None,
        #      nonlinearity=lrelu), with_BatchNorm)
        # fully-connected layer
        # layer = BatchNorm(DenseLayer(
        #     layer, 1024, W=init, b=None, nonlinearity=lrelu), with_BatchNorm)
    elif arch == 'lsgan':
        layer = batch_norm(Conv2DLayer(
            layer, 256, 5, stride=2, pad='same', nonlinearity=lrelu))
        layer = batch_norm(Conv2DLayer(
            layer, 256, 5, stride=2, pad='same', nonlinearity=lrelu))
        layer = batch_norm(Conv2DLayer(
            layer, 256, 5, stride=2, pad='same', nonlinearity=lrelu))
    elif arch == 'crepe':
        # CREPE
        # form words from sequence of characters
        layer = BatchNorm(Conv2DLayer(
            layer, 1024, (128, 7), W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = MaxPool2DLayer(layer, (1, 3))
        # temporal convolution, 7-gram
        layer = BatchNorm(Conv2DLayer(
            layer, 512, (1, 7), W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = MaxPool2DLayer(layer, (1, 3))
        # temporal convolution, 3-gram
        layer = BatchNorm(Conv2DLayer(
            layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = BatchNorm(Conv2DLayer(
            layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu),
            with_BatchNorm)
        layer = flatten(layer)
        # fully-connected layers
        layer = dropout(DenseLayer(
            layer, 1024, W=init, b=None, nonlinearity=rectify))
        layer = dropout(DenseLayer(
            layer, 1024, W=init, b=None, nonlinearity=rectify))
    else:
        raise Exception("Model architecture {} is not supported".format(arch))
        # output layer (linear and without bias)
    if cond_var is not None:
        layer = DenseLayer(layer, 1024, nonlinearity=lrelu, b=None)
        layer = concat([layer, layer_cond])

    layer = DenseLayer(layer, 1, b=None, nonlinearity=None)
    print("Critic output:", layer.output_shape)
    return layer
Beispiel #39
0
    def get_char2word(self, ic, avg=False):
        suf = '_avg' if avg else ''
        ec = L.EmbeddingLayer(
            ic,
            self.args.vc,
            self.args.nc,
            name='ec' + suf,
            W=HeNormal() if not avg else Constant())  # (100, 24, 32, 16)
        ec.params[ec.W].remove('regularizable')

        if self.args.char_model == 'CNN':
            lds = L.dimshuffle(ec, (0, 3, 1, 2))  # (100, 16, 24, 32)
            ls = []
            for n in self.args.ngrams:
                lconv = L.Conv2DLayer(
                    lds,
                    self.args.nf, (1, n),
                    untie_biases=True,
                    W=HeNormal('relu') if not avg else Constant(),
                    name='conv_%d' % n + suf)  # (100, 64/4, 24, 32-n+1)
                lpool = L.MaxPool2DLayer(
                    lconv, (1, self.args.max_len - n + 1))  # (100, 64, 24, 1)
                lpool = L.flatten(lpool, outdim=3)  # (100, 16, 24)
                lpool = L.dimshuffle(lpool, (0, 2, 1))  # (100, 24, 16)
                ls.append(lpool)
            xc = L.concat(ls, axis=2)  # (100, 24, 64)
            return xc

        elif self.args.char_model == 'LSTM':
            ml = L.ExpressionLayer(
                ic, lambda x: T.neq(x, 0))  # mask layer (100, 24, 32)
            ml = L.reshape(ml, (-1, self.args.max_len))  # (2400, 32)

            gate_params = L.recurrent.Gate(W_in=Orthogonal(),
                                           W_hid=Orthogonal())
            cell_params = L.recurrent.Gate(W_in=Orthogonal(),
                                           W_hid=Orthogonal(),
                                           W_cell=None,
                                           nonlinearity=tanh)

            lstm_in = L.reshape(
                ec, (-1, self.args.max_len, self.args.nc))  # (2400, 32, 16)
            lstm_f = L.LSTMLayer(
                lstm_in,
                self.args.nw / 2,
                mask_input=ml,
                grad_clipping=10.,
                learn_init=True,
                peepholes=False,
                precompute_input=True,
                ingate=gate_params,
                forgetgate=gate_params,
                cell=cell_params,
                outgate=gate_params,
                # unroll_scan=True,
                only_return_final=True,
                name='forward' + suf)  # (2400, 64)
            lstm_b = L.LSTMLayer(
                lstm_in,
                self.args.nw / 2,
                mask_input=ml,
                grad_clipping=10.,
                learn_init=True,
                peepholes=False,
                precompute_input=True,
                ingate=gate_params,
                forgetgate=gate_params,
                cell=cell_params,
                outgate=gate_params,
                # unroll_scan=True,
                only_return_final=True,
                backwards=True,
                name='backward' + suf)  # (2400, 64)
            remove_reg(lstm_f)
            remove_reg(lstm_b)
            if avg:
                set_zero(lstm_f)
                set_zero(lstm_b)
            xc = L.concat([lstm_f, lstm_b], axis=1)  # (2400, 128)
            xc = L.reshape(xc,
                           (-1, self.args.sw, self.args.nw))  # (100, 24, 256)
            return xc
Beispiel #40
0
def test_memory(game_title='SpaceInvaders-v0',
                        n_parallel_games=3,
                        replay_seq_len=2,
                        ):
    """
    :param game_title: name of atari game in Gym
    :param n_parallel_games: how many games we run in parallel
    :param replay_seq_len: how long is one replay session from a batch
    """

    atari = gym.make(game_title)
    atari.reset()

    # Game Parameters
    n_actions = atari.action_space.n
    observation_shape = (None,) + atari.observation_space.shape
    action_names = atari.get_action_meanings()
    del atari
    # ##### Agent observations

    # image observation at current tick goes here
    observation_layer = InputLayer(observation_shape, name="images input")

    # reshape to [batch, color, x, y] to allow for convolutional layers to work correctly
    observation_reshape = DimshuffleLayer(observation_layer, (0, 3, 1, 2))

    # Agent memory states
    
    memory_dict = OrderedDict([])
    
    
    ###Window
    window_size = 3

    # prev state input
    prev_window = InputLayer((None, window_size) + tuple(observation_reshape.output_shape[1:]),
                             name="previous window state")
    

    # our window
    window = WindowAugmentation(observation_reshape,
                                prev_window,
                                name="new window state")
    
    # pixel-wise maximum over the temporal window (to avoid flickering)
    window_max = ExpressionLayer(window,
                                 lambda a: a.max(axis=1),
                                 output_shape=(None,) + window.output_shape[2:])

    
    memory_dict[window] = prev_window
    
    ###Stack
    #prev stack
    stack_w,stack_h = 4, 5
    stack_inputs = DenseLayer(observation_reshape,stack_w,name="prev_stack")
    stack_controls = DenseLayer(observation_reshape,3,
                              nonlinearity=lasagne.nonlinearities.softmax,
                              name="prev_stack")
    prev_stack = InputLayer((None,stack_h,stack_w),
                             name="previous stack state")
    stack = StackAugmentation(stack_inputs,prev_stack, stack_controls)
    memory_dict[stack] = prev_stack
    
    stack_top = lasagne.layers.SliceLayer(stack,0,1)

    
    ###RNN preset
    
    prev_rnn = InputLayer((None,16),
                             name="previous RNN state")
    new_rnn = RNNCell(prev_rnn,observation_reshape)
    memory_dict[new_rnn] = prev_rnn
    
    ###GRU preset
    prev_gru = InputLayer((None,16),
                             name="previous GRUcell state")
    new_gru = GRUCell(prev_gru,observation_reshape)
    memory_dict[new_gru] = prev_gru
    
    ###GRUmemorylayer
    prev_gru1 = InputLayer((None,15),
                             name="previous GRUcell state")
    new_gru1 = GRUMemoryLayer(15,observation_reshape,prev_gru1)
    memory_dict[new_gru1] = prev_gru1
    
    #LSTM with peepholes
    prev_lstm0_cell = InputLayer((None,13),
                             name="previous LSTMCell hidden state [with peepholes]")
    
    prev_lstm0_out = InputLayer((None,13),
                             name="previous LSTMCell output state [with peepholes]")

    new_lstm0_cell,new_lstm0_out = LSTMCell(prev_lstm0_cell,prev_lstm0_out,
                                            input_or_inputs = observation_reshape,
                                            peepholes=True,name="newLSTM1 [with peepholes]")
    
    memory_dict[new_lstm0_cell] = prev_lstm0_cell
    memory_dict[new_lstm0_out] = prev_lstm0_out


    #LSTM without peepholes
    prev_lstm1_cell = InputLayer((None,14),
                             name="previous LSTMCell hidden state [no peepholes]")
    
    prev_lstm1_out = InputLayer((None,14),
                             name="previous LSTMCell output state [no peepholes]")

    new_lstm1_cell,new_lstm1_out = LSTMCell(prev_lstm1_cell,prev_lstm1_out,
                                            input_or_inputs = observation_reshape,
                                            peepholes=False,name="newLSTM1 [no peepholes]")
    
    memory_dict[new_lstm1_cell] = prev_lstm1_cell
    memory_dict[new_lstm1_out] = prev_lstm1_out
    
    ##concat everything
    
    for i in [flatten(window_max),stack_top,new_rnn,new_gru,new_gru1]:
        print(i.output_shape)
    all_memory = concat([flatten(window_max),stack_top,new_rnn,new_gru,new_gru1,new_lstm0_out,new_lstm1_out,])
    
    
    

    # ##### Neural network body
    # you may use any other lasagne layers, including convolutions, batch_norms, maxout, etc


    # a simple lasagne network (try replacing with any other lasagne network and see what works best)
    nn = DenseLayer(all_memory, num_units=50, name='dense0')

    # Agent policy and action picking
    q_eval = DenseLayer(nn,
                        num_units=n_actions,
                        nonlinearity=lasagne.nonlinearities.linear,
                        name="QEvaluator")

    # resolver
    resolver = EpsilonGreedyResolver(q_eval, epsilon=0.1, name="resolver")

    # agent
    agent = Agent(observation_layer,
                  memory_dict,
                  q_eval, resolver)

    # Since it's a single lasagne network, one can get it's weights, output, etc
    weights = lasagne.layers.get_all_params(resolver, trainable=True)

    # Agent step function
    print('compiling react')
    applier_fun = agent.get_react_function()

    # a nice pythonic interface
    def step(observation, prev_memories='zeros', batch_size=n_parallel_games):
        """ returns actions and new states given observation and prev state
        Prev state in default setup should be [prev window,]"""
        # default to zeros
        if prev_memories == 'zeros':
            prev_memories = [np.zeros((batch_size,) + tuple(mem.output_shape[1:]),
                                      dtype='float32')
                             for mem in agent.agent_states]
        res = applier_fun(np.array(observation), *prev_memories)
        action = res[0]
        memories = res[1:]
        return action, memories

    # # Create and manage a pool of atari sessions to play with

    pool = GamePool(game_title, n_parallel_games)

    observation_log, action_log, reward_log, _, _, _ = pool.interact(step, 50)

    print(np.array(action_names)[np.array(action_log)[:3, :5]])

    # # experience replay pool
    # Create an environment with all default parameters
    env = SessionPoolEnvironment(observations=observation_layer,
                                 actions=resolver,
                                 agent_memories=agent.agent_states)

    def update_pool(env, pool, n_steps=100):
        """ a function that creates new sessions and ads them into the pool
        throwing the old ones away entirely for simplicity"""

        preceding_memory_states = list(pool.prev_memory_states)

        # get interaction sessions
        observation_tensor, action_tensor, reward_tensor, _, is_alive_tensor, _ = pool.interact(step, n_steps=n_steps)

        # load them into experience replay environment
        env.load_sessions(observation_tensor, action_tensor, reward_tensor, is_alive_tensor, preceding_memory_states)

    # load first  sessions
    update_pool(env, pool, replay_seq_len)

    # A more sophisticated way of training is to store a large pool of sessions and train on random batches of them.
    # ### Training via experience replay

    # get agent's Q-values obtained via experience replay
    _env_states, _observations, _memories, _imagined_actions, q_values_sequence = agent.get_sessions(
        env,
        session_length=replay_seq_len,
        batch_size=env.batch_size,
        optimize_experience_replay=True,
    )

    # Evaluating loss function

    scaled_reward_seq = env.rewards
    # For SpaceInvaders, however, not scaling rewards is at least working


    elwise_mse_loss = qlearning.get_elementwise_objective(q_values_sequence,
                                                          env.actions[0],
                                                          scaled_reward_seq,
                                                          env.is_alive,
                                                          gamma_or_gammas=0.99, )

    # compute mean over "alive" fragments
    mse_loss = elwise_mse_loss.sum() / env.is_alive.sum()

    # regularize network weights
    reg_l2 = regularize_network_params(resolver, l2) * 10 ** -4

    loss = mse_loss + reg_l2

    # Compute weight updates
    updates = lasagne.updates.adadelta(loss, weights, learning_rate=0.01)

    # mean session reward
    mean_session_reward = env.rewards.sum(axis=1).mean()

    # # Compile train and evaluation functions

    print('compiling')
    train_fun = theano.function([], [loss, mean_session_reward], updates=updates)
    evaluation_fun = theano.function([], [loss, mse_loss, reg_l2, mean_session_reward])
    print("I've compiled!")

    # # Training loop

    for epoch_counter in range(10):
        update_pool(env, pool, replay_seq_len)
        loss, avg_reward = train_fun()
        full_loss, q_loss, l2_penalty, avg_reward_current = evaluation_fun()

        print("epoch %i,loss %.5f, rewards: %.5f " % (
            epoch_counter, full_loss, avg_reward_current))
        print("rec %.3f reg %.3f" % (q_loss, l2_penalty))