Example #1
0
def test_transform_identity():
    from lasagne.layers import InputLayer, TransformerLayer
    from lasagne.utils import floatX
    from theano.tensor import constant
    batchsize = 10
    l_in = InputLayer((batchsize, 3, 28, 28))
    l_loc = InputLayer((batchsize, 6))
    layer = TransformerLayer(l_in, l_loc)
    inputs = floatX(np.arange(np.prod(l_in.shape)).reshape(l_in.shape))
    thetas = floatX(np.tile([1, 0, 0, 0, 1, 0], (batchsize, 1)))
    outputs = layer.get_output_for([constant(inputs), constant(thetas)]).eval()
    np.testing.assert_allclose(inputs, outputs, rtol=1e-6)
Example #2
0
def test_transform_identity():
    from lasagne.layers import InputLayer, TransformerLayer
    from lasagne.utils import floatX
    from theano.tensor import constant
    batchsize = 10
    l_in = InputLayer((batchsize, 3, 28, 28))
    l_loc = InputLayer((batchsize, 6))
    layer = TransformerLayer(l_in, l_loc)
    inputs = floatX(np.arange(np.prod(l_in.shape)).reshape(l_in.shape))
    thetas = floatX(np.tile([1, 0, 0, 0, 1, 0], (batchsize, 1)))
    outputs = layer.get_output_for([constant(inputs), constant(thetas)]).eval()
    np.testing.assert_allclose(inputs, outputs, rtol=1e-6)
Example #3
0
def test_model(input_shape):
    b = np.zeros((2, 3), dtype=theano.config.floatX)
    b[0, 0] = 1
    b[1, 1] = 1
    b = b.flatten()
    l_in = InputLayer(shape=(None, input_shape[1], input_shape[2],
                             input_shape[3]))
    l_dense = DenseLayer(l_in, num_units=6, W=lasagne.init.Constant(0.0), b=b)
    l_transform = TransformerLayer(l_in, l_dense, downsample_factor=1)
    return l_transform
Example #4
0
def construct_gen(noise_1, noise_2, batch_size=10):
    # There are two time steps considered for this model, so two LSTMs
    # Reshape noises
    noise1_rshp = noise_1.dimshuffle(0, 'x', 1)
    noise2_rshp = noise_2.dimshuffle(0, 'x', 1)
    lstm1_inp = InputLayer((None, 1, 100), input_var=noise1_rshp)
    lstm2_inp = InputLayer((None, 1, 100), input_var=noise2_rshp)

    lstm2 = ExposedLSTMLayer(lstm2_inp, 100)
    lstm2_h = SliceLayer(lstm2, indices=slice(num_units, None), axis=-1)
    lstm2_reshape = ReshapeLayer(lstm2_h, (batch_size, 100))

    print("LSTM2's output is " + str(lstm2_reshape.output_shape))

    build_bg = gen_bg(lstm1_inp)
    build_gfc = gen_fc(lstm2_reshape)
    build_gif = gen_fi(build_gfc)
    build_gfmask = gen_fmask(build_gfc)

    # Affine transformation and pasting with bg
    a_t = DenseLayer(lstm2_reshape, num_units=6, W=w1)  #6 dim output
    m_t_hat = NonlinearityLayer(PadLayer(
        TransformerLayer(build_gfmask, a_t, downsample_factor=2), 8),
                                nonlinearity=tanh)
    f_t_hat = NonlinearityLayer(PadLayer(
        TransformerLayer(build_gif, a_t, downsample_factor=2), 8),
                                nonlinearity=tanh)

    prior = ElemwiseMergeLayer([m_t_hat, f_t_hat],
                               merge_function=tensor.mul,
                               broadcastable=1)
    posterior = ElemwiseMergeLayer([ComplimentLayer(m_t_hat), build_bg],
                                   merge_function=tensor.mul,
                                   broadcastable=1)

    gen_image = ElemwiseSumLayer([prior, posterior])

    return gen_image
Example #5
0
def build_st_network(b_size, input_shape, withdisc=True):
    # General Params
    num_filters = 64
    filter_size = (3, 3)
    pool_size = (2, 2)

    # SP Param
    b = np.zeros((2, 3), dtype=theano.config.floatX)
    b[0, 0] = 1
    b[1, 1] = 1
    b = b.flatten()  # identity transform

    # Localization Network
    l_in = InputLayer(shape=(None, input_shape[1], input_shape[2],
                             input_shape[3]))

    l_conv1 = Conv2DLayer(l_in,
                          num_filters=num_filters,
                          filter_size=filter_size)

    l_pool1 = MaxPool2DLayer(l_conv1, pool_size=pool_size)

    l_conv2 = Conv2DLayer(l_pool1,
                          num_filters=num_filters,
                          filter_size=filter_size)

    l_pool2 = MaxPool2DLayer(l_conv2, pool_size=pool_size)

    l_loc = DenseLayer(l_pool2, num_units=64, W=lasagne.init.HeUniform('relu'))

    l_param_reg = DenseLayer(l_loc,
                             num_units=6,
                             b=b,
                             nonlinearity=lasagne.nonlinearities.linear,
                             W=lasagne.init.Constant(0.0),
                             name='param_regressor')

    if withdisc:
        l_dis = DiscreteLayer(l_param_reg,
                              start=Constant(-3.),
                              stop=Constant(3.),
                              linrange=Constant(50.))
    else:
        l_dis = l_param_reg

    # Transformer Network
    l_trans = TransformerLayer(l_in, l_dis, downsample_factor=1.0)

    final = ReshapeLayer(l_trans, shape=([0], -1))
    return final
Example #6
0
    def build_scaled_model(self, previous_layer):
        from lasagne.layers import TransformerLayer

        b = np.zeros((2, 3), dtype='float32')
        b[0, 0] = 7.0
        b[1, 1] = 7.0
        b = b.flatten()  # identity transform
        W = lasagne.init.Constant(0.0)
        scalenet = {}
        scalenet['input'] = previous_layer
        scalenet['scale_init'] = lasagne.layers.DenseLayer(scalenet['input'], num_units=6, W=W, b=b, nonlinearity=None)
        scalenet['scale'] = TransformerLayer(scalenet['input'], scalenet['scale_init'], downsample_factor=1.0/7.0) # Output should be 3x224x224
        
        return scalenet, scalenet['scale']
Example #7
0
    def test_transform_border_modes(self):
        from lasagne.layers import InputLayer, TransformerLayer
        from lasagne.utils import floatX
        from theano.tensor import constant

        l_in = InputLayer((1, 1, 16, 16))
        l_loc = InputLayer((1, 6))

        # border_mode='nearest'
        layer = TransformerLayer(l_in, l_loc, border_mode='nearest')
        image = np.hstack((np.zeros((16, 8)), np.ones((16, 8))))
        inputs = floatX(image).reshape(l_in.shape)
        thetas = floatX(np.array([[4, 0, 0, 0, 1, 0]]))
        outputs = layer.get_output_for([constant(inputs),
                                        constant(thetas)]).eval()

        np.testing.assert_allclose(inputs, outputs, rtol=1e-6)

        # border_mode='mirror'
        layer = TransformerLayer(l_in, l_loc, border_mode='mirror')
        outputs = layer.get_output_for([constant(inputs),
                                        constant(thetas)]).eval()
        expected = np.zeros_like(outputs)
        expected[0, 0] = [.5, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, .5]

        np.testing.assert_allclose(expected, outputs, rtol=1e-6)

        # border_mode='wrap'
        layer = TransformerLayer(l_in, l_loc, border_mode='wrap')
        outputs = layer.get_output_for([constant(inputs),
                                        constant(thetas)]).eval()
        expected = np.zeros_like(outputs)
        expected[0, 0] = [1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0]

        np.testing.assert_allclose(expected, outputs, rtol=1e-6)

        with pytest.raises(ValueError):
            layer = TransformerLayer(l_in, l_loc, border_mode='invalid')
            outputs = layer.get_output_for([constant(inputs),
                                            constant(thetas)]).eval()
Example #8
0
def build_mitosis_encoder(input_shape, encoding_size=32, withst=False):
    # Parameters
    filter_size = (3, 3)
    num_filters = 32
    pool_size = (2, 2)
    # Localization Network

    l_input = InputLayer(shape=(None, input_shape[1], input_shape[2],
                                input_shape[3]))
    l_conv1 = Conv2DLayer(l_input,
                          num_filters=num_filters,
                          filter_size=filter_size)
    l_conv2 = Conv2DLayer(l_conv1,
                          num_filters=num_filters,
                          filter_size=filter_size)
    l_pool1 = MaxPool2DLayer(l_conv2, pool_size=pool_size)
    l_pipe1_layer = l_pool1  # We need this

    # ST Network
    if withst:
        # ST Params
        b = np.zeros((2, 3), dtype=theano.config.floatX)
        b[0, 0] = 1
        b[1, 1] = 1
        b = b.flatten()

        # ST Layers
        st_encode1 = DenseLayer(l_pool1,
                                num_units=50,
                                W=lasagne.init.HeUniform('relu'))
        st_encode2 = DenseLayer(st_encode1,
                                num_units=6,
                                b=b,
                                W=lasagne.init.Constant(0.0))
        l_trans1 = TransformerLayer(l_input, st_encode2, downsample_factor=1.0)

        # Localization Network

        st_conv1 = Conv2DLayer(l_trans1,
                               num_filters=num_filters,
                               filter_size=filter_size)
        st_covn2 = Conv2DLayer(st_conv1,
                               num_filters=num_filters,
                               filter_size=filter_size)
        st_pool1 = MaxPool2DLayer(st_covn2, pool_size=pool_size)
        l_pipe1_layer = st_pool1

    # Encoding Step
    l_reshape1 = ReshapeLayer(l_pipe1_layer, shape=([0], -1))
    l_encode = DenseLayer(l_reshape1,
                          num_units=encoding_size,
                          W=lasagne.init.HeUniform('relu'),
                          name='encoder')

    # Decoding Step
    l_decode = DenseLayer(l_encode,
                          W=l_encode.W.T,
                          num_units=l_reshape1.output_shape[1])
    l_reshape2 = ReshapeLayer(
        l_decode,
        shape=([0], num_filters,
               int(np.sqrt(l_reshape1.output_shape[1] / num_filters)),
               int(np.sqrt(l_reshape1.output_shape[1] / num_filters))))

    # Deconv Network
    l_unpool1 = Upscale2DLayer(l_reshape2, scale_factor=pool_size)
    l_deconv2 = TransposedConv2DLayer(l_unpool1,
                                      num_filters=l_conv2.input_shape[1],
                                      W=l_conv2.W,
                                      filter_size=l_conv2.filter_size,
                                      stride=l_conv2.stride,
                                      crop=l_conv2.pad,
                                      flip_filters=not l_conv2.flip_filters)

    l_deconv1 = TransposedConv2DLayer(l_deconv2,
                                      num_filters=l_conv1.input_shape[1],
                                      W=l_conv1.W,
                                      filter_size=l_conv1.filter_size,
                                      stride=l_conv1.stride,
                                      crop=l_conv1.pad,
                                      flip_filters=not l_conv1.flip_filters)

    return l_deconv1
Example #9
0
def build_st_network_MNIST(input_shape, mins, maxs, ranges, withdisc=True):
    # General Params
    num_filters = 64
    filter_size = (3, 3)
    pool_size = (2, 2)

    # SP Param
    b = np.zeros((2, 3), dtype=theano.config.floatX)
    b[0, 0] = 1
    b[1, 1] = 1
    b = b.flatten()  # identity transform

    # Localization Network
    l_in = InputLayer(shape=(None, input_shape[1], input_shape[2],
                             input_shape[3]))

    l_conv1 = Conv2DLayer(l_in,
                          num_filters=num_filters,
                          filter_size=filter_size)

    l_pool1 = MaxPool2DLayer(l_conv1, pool_size=pool_size)

    l_conv2 = Conv2DLayer(l_pool1,
                          num_filters=num_filters,
                          filter_size=filter_size)

    l_pool2 = MaxPool2DLayer(l_conv2, pool_size=pool_size)

    l_loc = DenseLayer(l_pool2, num_units=64, W=lasagne.init.HeUniform('relu'))

    l_param_reg = DenseLayer(l_loc,
                             num_units=6,
                             b=b,
                             nonlinearity=lasagne.nonlinearities.linear,
                             W=lasagne.init.Constant(0.0),
                             name='param_regressor')

    if withdisc:
        l_dis = DiscreteLayer(l_param_reg, mins, maxs, ranges)
    else:
        l_dis = l_param_reg

    # Transformer Network
    l_trans = TransformerLayer(l_in, l_dis, downsample_factor=1.0)

    # Classification Network
    network = lasagne.layers.Conv2DLayer(
        l_trans,
        num_filters=32,
        filter_size=(5, 5),
        nonlinearity=lasagne.nonlinearities.rectify,
        W=lasagne.init.GlorotUniform())
    # Expert note: Lasagne provides alternative convolutional layers that
    # override Theano's choice of which implementation to use; for details
    # please see http://lasagne.readthedocs.org/en/latest/user/tutorial.html.

    # Max-pooling layer of factor 2 in both dimensions:
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))

    # Another convolution with 32 5x5 kernels, and another 2x2 pooling:
    network = lasagne.layers.Conv2DLayer(
        network,
        num_filters=32,
        filter_size=(5, 5),
        nonlinearity=lasagne.nonlinearities.rectify)
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))

    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(
        lasagne.layers.dropout(network, p=.5),
        num_units=256,
        nonlinearity=lasagne.nonlinearities.rectify)

    # And, finally, the 10-unit output layer with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(
        lasagne.layers.dropout(network, p=.5),
        num_units=10,
        nonlinearity=lasagne.nonlinearities.softmax)

    return network
Example #10
0
    def test_transform_border_modes(self):
        from lasagne.layers import InputLayer, TransformerLayer
        from lasagne.utils import floatX
        from theano.tensor import constant

        l_in = InputLayer((1, 1, 16, 16))
        l_loc = InputLayer((1, 6))

        # border_mode='nearest'
        layer = TransformerLayer(l_in, l_loc, border_mode='nearest')
        image = np.hstack((np.zeros((16, 8)), np.ones((16, 8))))
        inputs = floatX(image).reshape(l_in.shape)
        thetas = floatX(np.array([[4, 0, 0, 0, 1, 0]]))
        outputs = layer.get_output_for([constant(inputs),
                                        constant(thetas)]).eval()

        np.testing.assert_allclose(inputs, outputs, rtol=1e-6)

        # border_mode='mirror'
        layer = TransformerLayer(l_in, l_loc, border_mode='mirror')
        outputs = layer.get_output_for([constant(inputs),
                                        constant(thetas)]).eval()
        expected = np.zeros_like(outputs)
        expected[0, 0, :, (0, 1, 2, 3, 8, 9, 10, 11, 15)] = 1.

        np.testing.assert_allclose(expected, np.ceil(outputs), rtol=1e-6)

        # border_mode='wrap'
        layer = TransformerLayer(l_in, l_loc, border_mode='wrap')
        outputs = layer.get_output_for([constant(inputs),
                                        constant(thetas)]).eval()
        expected = np.zeros_like(outputs)
        expected[0, 0, :, (0, 1, 4, 5, 8, 9, 12, 13, 15)] = 1.

        np.testing.assert_allclose(expected, np.ceil(outputs), rtol=1e-6)
        with pytest.raises(ValueError):
            layer = TransformerLayer(l_in, l_loc, border_mode='invalid')
            outputs = layer.get_output_for(
                [constant(inputs), constant(thetas)]).eval()
Example #11
0
def buildNetwork(CFG, params, vocab):
    # {{{
    """
    TODO document me
    """

    # Use params to update CFG
    CFG = get_CFG(CFG, params)

    #-----------------------------------------------------------
    #           Setting up the image Embedding.
    #-----------------------------------------------------------
    l_input_sentence = InputLayer((CFG['BATCH_SIZE'], 1), name='l_input_sentence')  # input (1 word)
    l_sentence_embedding = lasagne.layers.EmbeddingLayer(l_input_sentence,
                                                         input_size=len(vocab),
                                                         output_size=CFG['EMBEDDING_SIZE'],
                                                         name='l_sentence_embedding')

    # Setting up CNN in case of fine tuning.
    if CFG['CNN_FINE_TUNE']:
        cnn, l_input_cnn, l_input_img = build_CNN(CFG)

        if CFG['CNN_MODEL'] == "vgg":
            vgg16, resnet50 = cnn, None
        elif CFG["CNN_MODEL"] == "resnet":
            vgg16, resnet50 = None, cnn

        if CFG['START_NORMALIZED'] == 1:
            l_input_cnn = ExpressionLayer(l_input_cnn, lambda X: X / (T.sum(X, axis=1, keepdims=True) + 1e-8), output_shape='auto')
        elif CFG['START_NORMALIZED'] == 2:
            l_input_cnn = ExpressionLayer(l_input_cnn, lambda X: X / T.sqrt(
                T.sum(X**2, axis=1, keepdims=True) + 1e-8), output_shape='auto')
    else:
        l_input_cnn = InputLayer((CFG['BATCH_SIZE'], CFG['CNN_FEATURE_SIZE']), name='l_input_cnn')

    l_cnn_embedding = DenseLayer(l_input_cnn, num_units=CFG['EMBEDDING_SIZE'],
                                 nonlinearity=lasagne.nonlinearities.identity, name='l_cnn_embedding')

    l_cnn_embedding2 = ReshapeLayer(l_cnn_embedding, ([0], 1, [1]), name='l_cnn_embedding2')

    l_rnn_input = InputLayer((CFG['BATCH_SIZE'], 1, CFG['EMBEDDING_SIZE']), name='l_rnn_input')
    l_dropout_input = DropoutLayer(
        l_rnn_input, p=0.5, name='l_dropout_input')

    l_input_reg = None
    l_out_reg = None
    l_decoder = None
    l_region_feedback = None
    l_region = None
    l_input_img2 = None
    l_boxes = None
    l_conv = None
    l_loc = None
    l_loc1 = None
    l_input_loc = None
    l_sel_region2 = None
    l_weighted_region_prev = None
    l_weighted_region = None

    input_shape = (CFG['BATCH_SIZE'], CFG['EMBEDDING_SIZE'])
    if CFG['MODE'] == 'normal':
        # {{{1
        l_cell_input = InputLayer(input_shape, name='l_cell_input')
        l_prev_gru = InputLayer(input_shape, name="l_prev_gru")
        l_gru = GRUMemoryLayer(CFG['EMBEDDING_SIZE'],
                               l_cell_input, l_prev_gru, name='l_gru')

        l_dropout_output = DropoutLayer(
            l_gru, p=0.5, name='l_dropout_output')
        # decoder is a fully connected layer with one output unit for each word in the vocabulary
        l_decoder = DenseLayer(l_dropout_output, num_units=len(
            vocab), nonlinearity=lasagne.nonlinearities.softmax, name='l_decoder')

        l_out = ReshapeLayer(
            l_decoder, ([0], 1, [1]), name='l_out')
        # }}}

    elif CFG['MODE'] == 'tensor':
        l_cell_input = InputLayer(input_shape, name='l_cell_input')
        l_prev_gru = InputLayer(input_shape, name="l_prev_gru")
        l_gru = GRUMemoryLayer(CFG['EMBEDDING_SIZE'], l_cell_input, l_prev_gru, name='l_gru')
        l_dropout_output = DropoutLayer(l_gru, p=0.5, name='l_dropout_output')
        l_dropout_output = ReshapeLayer(l_dropout_output, ([0], 1, [1]), name='l_dropout_output')

            # TODO put me back

        if CFG['CNN_FINE_TUNE']:
            l_input_regions, _input_regions, l_out_reg, l_input_img2, l_boxes, l_conv = build_finetune_proposals(CFG, vgg16, resnet50)
        else:
            l_input_regions = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS']), name='l_input_regions')

        # TODO a block.
        #l_decoder = build_decoderLayer(l_dropout_output, l_input_regions, vocab, CFG)
        if CFG.has_key('DISSECT') and CFG['DISSECT'] != 'No':
            if CFG['DISSECT'] == 'wr':
                l_decoder = TensorProdFactLayer((l_dropout_output, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'],
                                                dim_w=len(vocab), nonlinearity=softmax, name='l_tensor', W_hr='skip', b_hr='skip')
            elif CFG['DISSECT'] == 'rs':
                l_decoder = TensorProdFactLayer((l_dropout_output, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'],
                                                dim_w=len(vocab), nonlinearity=softmax, name='l_tensor', W_rw='skip', b_rw='skip')
        if CFG['DISSECT'] == 'wr':
            l_decoder = TensorProdFactLayer((l_dropout_output, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'],
                                            dim_w=len(vocab), nonlinearity=softmax, name='l_tensor', W_hr='skip', b_hr='skip')
        elif CFG['DISSECT'] == 'rs':
            l_decoder = TensorProdFactLayer((l_dropout_output, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'],
                                            dim_w=len(vocab), nonlinearity=softmax, name='l_tensor', W_rw='skip', b_rw='skip')
        else:
            l_decoder = TensorProdFactLayer((l_dropout_output, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'],
                                            dim_w=len(vocab), nonlinearity=softmax, name='l_tensor')

        l_out = ExpressionLayer(l_decoder, lambda X: X.sum(2), output_shape='auto', name='l_out')  # sum over regions

    elif CFG['MODE'] == 'transformer':
        #{{{2
        print(bcolors.OKGREEN + "Transformer mode." + bcolors.ENDC)
        from TProd3 import TensorProdFactLayer, WeightedSumLayer, SubsampleLayer

        # define a cell
        l_cell_input = InputLayer((CFG['BATCH_SIZE'], CFG['EMBEDDING_SIZE']), name='l_cell_input')
        from agentnet.memory import GRUMemoryLayer
        l_prev_gru = InputLayer((CFG['BATCH_SIZE'], CFG['EMBEDDING_SIZE']), name="l_prev_gru")

        if CFG['TRANS_FEEDBACK']:
            l_weighted_region_prev = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE']), name="l_weighted_region_prev")
            if CFG['FEEDBACK'] == 2:
                l_cell_concat = lasagne.layers.ConcatLayer(
                    [l_cell_input, l_weighted_region_prev], axis=1, name='l_cell_concat')
            else:
                print("Are you sure you don't want to use feedback=2? I think you should. Change your mind, then come to see me again.")
        else:
            l_cell_concat = l_cell_input

        l_gru = GRUMemoryLayer(CFG['EMBEDDING_SIZE'],
                               l_cell_concat, l_prev_gru, name='l_gru')
        l_dropout_output = DropoutLayer(l_gru, p=CFG['RNN_DROPOUT'], name='l_dropout_output')
        l_dropout_output = ReshapeLayer(l_dropout_output, ([0], 1, [1]), name='l_dropout_output')

        if CFG['TRANS_USE_PRETRAINED']:
            l_out_reg = vgg16['conv5_2']
            #l_out_reg2 = vgg16['conv5_3']
        else:
            l_out_reg = vgg16['conv5_3']
        l_input_reg = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], 14, 14), name='l_input_reg')
        l_input_regions = l_input_reg
        if CFG['TRANS_USE_PRETRAINED']:
            l_input_regions = l_input_regions
        else:
            if CFG['CONV_NORMALIZED'] == 1:
                l_input_regions = ExpressionLayer(l_input_regions, lambda X: X / (T.sum(X, axis=1, keepdims=True) + 1e-8), output_shape='auto')
            elif CFG['CONV_NORMALIZED'] == 2:
                l_input_regions = ExpressionLayer(l_input_regions, lambda X: X / T.sqrt(T.sum(X**2, axis=1, keepdims=True) + 1e-8), output_shape='auto')
            else:
                l_input_regions = ExpressionLayer(l_input_regions, lambda X: X * 0.01, output_shape='auto')

        factor = 2.0
        W = lasagne.init.Constant(0.0)
        b = lasagne.init.Constant(0.0)
        if CFG['TRANS_MULTIPLE_BOXES']:
            num_prop, l_loc = build_loc_net(CFG, l_gru, l_input_regions, 1, (
                14, 14), (3, 3), CFG['TRANS_STRIDE'], CFG['TRANS_ZOOM'], W, b, name='')
            if CFG['TRANS_ADD_BIG_PROPOSALS']:
                num_prop_big, l_loc_big = build_loc_net(CFG, l_gru, l_input_regions, 1, (
                    14, 14), (3, 3), CFG['TRANS_STRIDE'], CFG['TRANS_ZOOM'] * 2, W, b, name='_big')
                l_loc = ConcatLayer((l_loc, l_loc_big), axis=0)
                num_prop += num_prop_big
            l_sel_region2 = MultiTransformerLayer(l_input_regions, l_loc, kernel_size=(
                3, 3), zero_padding=CFG['TRANS_ZEROPAD'])  # 3x3
            if CFG['TRANS_USE_PRETRAINED']:
                Wvgg = vgg16['conv5_3'].W.reshape(
                    (CFG['REGION_SIZE'], CFG['REGION_SIZE'] * 3 * 3)).swapaxes(0, 1)
                bvgg = vgg16['conv5_3'].b
                l_sel_region = DenseLayer(
                    l_sel_region2, num_units=CFG['REGION_SIZE'], name='l_sel_region', W=Wvgg, b=bvgg)
                if CFG['CONV_NORMALIZED'] == 1:
                    l_sel_region = ExpressionLayer(l_sel_region, lambda X: X / (T.sum(X, axis=1, keepdims=True) + 1e-8), output_shape='auto')
                elif CFG['CONV_NORMALIZED'] == 2:
                    l_sel_region = ExpressionLayer(l_sel_region, lambda X: X / T.sqrt(T.sum(X**2, axis=1, keepdims=True) + 1e-8), output_shape='auto')
                else:
                    l_sel_region = l_sel_region
            else:
                l_sel_region = DenseLayer(
                    l_sel_region, num_units=CFG['REGION_SIZE'], name='l_sel_region')

            l_sel_region = ReshapeLayer(
                l_sel_region, (CFG['BATCH_SIZE'], num_prop, CFG['REGION_SIZE']))
            l_sel_region = DimshuffleLayer(l_sel_region, (0, 2, 1))
            l_sel_region = ReshapeLayer(
                l_sel_region, (CFG['BATCH_SIZE'], CFG['REGION_SIZE'], num_prop))
        else:
            b = np.zeros((2, 3), dtype='float32')
            b[0, 0] = 2
            b[1, 1] = 2
            b = b.flatten()
            W = lasagne.init.Constant(0.0)
            l_input_loc = l_gru
            if CFG['TRANS_USE_STATE']:
                l_input_im = ConvLayer(l_input_regions, num_filters=512, filter_size=(
                    3, 3), pad='same', name='l_reduce_im1')
                l_input_im = lasagne.layers.MaxPool2DLayer(l_input_im, (2, 2))
                l_input_im = ConvLayer(l_input_im, num_filters=512, filter_size=(
                    3, 3), pad='same', name='l_reduce_im2')
                l_input_im = lasagne.layers.MaxPool2DLayer(l_input_im, (2, 2))
                l_input_im = ReshapeLayer(l_input_im, (CFG['BATCH_SIZE'], 512))
                l_input_loc = ConcatLayer((l_gru, l_input_im))
            l_loc1 = DenseLayer(
                l_input_loc, num_units=256, name='l_loc1')
            l_loc = DenseLayer(
                l_loc1, num_units=6, W=W, b=b, nonlinearity=None, name='l_loc2')
            l_sel_region = TransformerLayer(
                l_input_regions, l_loc, downsample_factor=2)
            l_sel_region = DenseLayer(
                l_sel_region, num_units=CFG['REGION_SIZE'], name='l_sel_region')
            l_sel_region = ReshapeLayer(
                l_sel_region, (CFG['BATCH_SIZE'], CFG['REGION_SIZE'], 1))

        l_decoder = TensorProdFactLayer((l_dropout_output, l_sel_region), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'], dim_w=len(
            vocab), W=lasagne.init.Normal(std=0.001, mean=0.0), nonlinearity=lasagne.nonlinearities.softmax, name='l_tensor')

        if CFG['TRANS_FEEDBACK']:
            l_region = ExpressionLayer(l_decoder, lambda X: X.sum(3), output_shape='auto', name='l_region')  # sum over regions
            l_weighted_region = WeightedSumLayer([l_sel_region, l_region], name='l_weighted_region')
        l_out = ExpressionLayer(l_decoder, lambda X: X.sum(
            2), output_shape='auto', name='l_out')  # sum over regions
        #}}}

    elif CFG['MODE'] == 'tensor-feedback':
        # {{{2
        # define a cell
        l_cell_input = InputLayer((CFG['BATCH_SIZE'], CFG['EMBEDDING_SIZE']), name='l_cell_input')
        l_region_feedback = InputLayer((CFG['BATCH_SIZE'], CFG['NUM_REGIONS']), name='l_region_feedback')
        l_cell_concat = lasagne.layers.ConcatLayer(
            [l_cell_input, l_region_feedback], axis=1, name='l_cell_concat')
        from agentnet.memory import GRUMemoryLayer
        l_prev_gru = InputLayer((CFG['BATCH_SIZE'], CFG['EMBEDDING_SIZE']), name="l_prev_gru")
        l_gru = GRUMemoryLayer(CFG['EMBEDDING_SIZE'],
                               l_cell_concat, l_prev_gru, name='l_gru')

        l_dropout_output = DropoutLayer(
            l_gru, p=0.5, name='l_dropout_output')
        l_dropout_output = ReshapeLayer(
            l_dropout_output, ([0], 1, [1]), name='l_dropout_output')

        from TProd3 import TensorProdFactLayer
        l_input_regions = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS']), name='l_input_regions')
        l_tensor = TensorProdFactLayer((l_dropout_output, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'], dim_w=len(
            vocab), nonlinearity=lasagne.nonlinearities.softmax, name='l_tensor')
        l_region = ExpressionLayer(l_tensor, lambda X: X.sum(
            3), output_shape='auto', name='l_region')  # sum over
        l_region = ReshapeLayer(
            l_region, ([0], [2]), name='l_region')

        l_out = ExpressionLayer(l_tensor, lambda X: X.sum(
            2), output_shape='auto', name='l_out')  # sum over regions
        #}}}

    elif CFG['MODE'] == 'tensor-feedback2':
        # {{{2
        l_feedback = InputLayer((CFG['BATCH_SIZE'], CFG['EMBEDDING_SIZE']), name='l_feedback')
        l_prev_gru = InputLayer((CFG['BATCH_SIZE'], CFG['EMBEDDING_SIZE']), name="l_prev_gru")
        from TProd3 import TensorProdFactLayer, WeightedSumLayer
        if CFG['PROPOSALS'] == 3:  # use images at different resolution but without fully connected layers
            import CNN
            vgg16_det = CNN.build_model_RCNN(
                CFG['NUM_REGIONS'], CFG['IM_SIZE'] * 1.5, pool_dims=3, dropout_value=CFG['RNN_DROPOUT'])
            print "Loading pretrained VGG16 parameters for detection"
            l_input_img2 = vgg16_det['input']
            l_conv = vgg16_det['conv5_3']
            l_boxes = vgg16_det['boxes']
            l_input_regions = vgg16_det['reshape']
            if CFG['CONV_NORMALIZED'] == 1:
                l_input_regions = ExpressionLayer(
                    l_input_regions, lambda X: X / (T.sum(X, axis=1, keepdims=True) + 1e-8), output_shape='auto')
            elif CFG['CONV_NORMALIZED'] == 2:
                l_input_regions = ExpressionLayer(
                    l_input_regions, lambda X: X / T.sqrt(T.sum(X**2, axis=1, keepdims=True) + 1e-8), output_shape='auto')
            else:
                l_input_regions = ExpressionLayer(
                    l_input_regions, lambda X: X * 0.01, output_shape='auto')
            l_cnn_embedding2 = DenseLayer(
                l_input_regions, num_units=CFG['REGION_SIZE'], name='l_cnn_proposals')
            l_input_regions = ReshapeLayer(
                l_cnn_embedding2, (CFG['BATCH_SIZE'], CFG['NUM_REGIONS'], CFG['REGION_SIZE'], 1))
            l_input_regions = lasagne.layers.DimshuffleLayer(
                l_input_regions, (0, 2, 1, 3))
            l_out_reg = l_input_regions
            l_input_reg = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS'], 1), name='l_input_reg')
            l_input_regions = ReshapeLayer(
                l_input_reg, (CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS']), name='l_input_regions')

        # use images at different resolution but without fully connected layers
        elif CFG['PROPOSALS'] == 4:
            if CFG['CNN_MODEL'] == 'vgg':
                import CNN
                vgg16_det = CNN.build_model_RCNN(CFG['NUM_REGIONS'], int(
                    CFG['IM_SIZE'] * 1.5), pool_dims=1, dropout_value=CFG['RNN_DROPOUT'])
                print "Loading pretrained VGG16 parameters for detection"
                model_param_values = pickle.load(open('vgg16.pkl'))[
                    'param values']
                lasagne.layers.set_all_param_values(
                    vgg16_det['conv5_3'], model_param_values[:-6])
                l_input_img2 = vgg16_det['input']
                l_conv = vgg16_det['conv5_3']
                l_boxes = vgg16_det['boxes']
                l_input_regions = vgg16_det['crop']
                l_input_regions = ReshapeLayer(
                    l_input_regions, (CFG['BATCH_SIZE'] * CFG['NUM_REGIONS'], CFG['REGION_SIZE']))
            else:
                resnet50_det = resnet_CNN.build_model_RCNN(
                    CFG['NUM_REGIONS'], im_size=CFG['IM_SIZE'] * 1.5, pool_dims=1, dropout_value=CFG['RNN_DROPOUT'])
                print "Loading pretrained resnet50 parameters for detection"
                # You can use this format to store other things for best effort
                model_param_values = pickle.load(open('resnet50.pkl'))[
                    'param values']
                from save_layers import add_names_layers_and_params
                add_names_layers_and_params(resnet50_det)
                #lasagne.layers.set_all_param_values(resnet50['prob'], model_param_values)
                set_param_dict(
                    resnet50_det['pool5'], model_param_values, prefix='', show_layers=False, relax=False)
                l_input_img2 = resnet50_det['input']
                l_conv = resnet50_det['res4f_relu']
                l_boxes = resnet50_det['boxes']
                l_input_regions = resnet50_det['crop']
                l_input_regions = ReshapeLayer(
                    l_input_regions, (CFG['BATCH_SIZE'] * CFG['NUM_REGIONS'], CFG['REGION_SIZE']))

            if CFG['CONV_NORMALIZED'] == 1:
                l_input_regions = ExpressionLayer(
                    l_input_regions, lambda X: X / (T.sum(X, axis=1, keepdims=True) + 1e-8), output_shape='auto')
            elif CFG['CONV_NORMALIZED'] == 2:
                l_input_regions = ExpressionLayer(
                    l_input_regions, lambda X: X / T.sqrt(T.sum(X**2, axis=1, keepdims=True) + 1e-8), output_shape='auto')
            else:
                _input_regions = ExpressionLayer(
                    l_input_regions, lambda X: X * 0.01, output_shape='auto')
            l_input_regions = ReshapeLayer(
                l_input_regions, (CFG['BATCH_SIZE'], CFG['NUM_REGIONS'], CFG['REGION_SIZE'], 1))
            l_input_regions = lasagne.layers.DimshuffleLayer(
                l_input_regions, (0, 2, 1, 3))
            l_out_reg = l_input_regions
            l_input_reg = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS'], 1), name='l_input_reg')
            l_input_regions = ReshapeLayer(
                l_input_reg, (CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS']), name='l_input_regions')
        else:
            if CFG['CNN_MODEL'] == 'vgg':
                l_out_reg = vgg16['conv5_3']
            elif CFG['CNN_MODEL'] == 'resnet':
                l_out_reg = resnet50['res4f_relu']
            else:
                print(bcolors.FAIL + "Unrecognized network" + bcolors.ENDC)

            l_input_reg = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], 14, 14), name='l_input_reg')
            if CFG['CONV_REDUCED'] > 1:
                # added a scaling factor of 100 to avoid exploding gradients
                l_input_regions = ExpressionLayer(
                    l_input_reg, lambda X: X[:, :, ::CFG['CONV_REDUCED'], ::CFG['CONV_REDUCED']], output_shape='auto')
            else:
                l_input_regions = l_input_reg
            if CFG['CONV_NORMALIZED'] == 1:
                l_input_regions = ExpressionLayer(
                    l_input_regions, lambda X: X / (T.sum(X, axis=1, keepdims=True) + 1e-8), output_shape='auto')
            elif CFG['CONV_NORMALIZED'] == 2:
                l_input_regions = ExpressionLayer(
                    l_input_regions, lambda X: X / T.sqrt(T.sum(X**2, axis=1, keepdims=True) + 1e-8), output_shape='auto')
            else:
                l_input_regions = ExpressionLayer(
                    l_input_regions, lambda X: X * 0.01, output_shape='auto')
            if CFG['TENSOR_ADD_CONV']:
                l_input_regions = ConvLayer(l_input_regions, num_filters=CFG['REGION_SIZE'], filter_size=(
                    3, 3), pad='same', name='l_add_con')
            l_input_regions = ReshapeLayer(
                l_input_regions, (CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS']))

        if CFG['TENSOR_TIED']:
            l_region_feedback = InputLayer((CFG['BATCH_SIZE'], CFG['NUM_REGIONS']), name='l_region_feedback')
            l_region_feedback2 = ReshapeLayer(
                l_region_feedback, ([0], 1, [1]), name='l_region_feedback2')
        else:
            l_shp2 = ReshapeLayer(
                l_prev_gru, (CFG['BATCH_SIZE'], 1, CFG['EMBEDDING_SIZE']))
            l_shp2 = DropoutLayer(
                l_shp2, p=CFG['RNN_DROPOUT'], name='l_shp2')
            l_tensor2 = TensorProdFactLayer((l_shp2, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'], dim_w=len(
                vocab), nonlinearity=lasagne.nonlinearities.softmax, name='l_tensor2')
            l_region_feedback = ExpressionLayer(l_tensor2, lambda X: T.sum(
                X, 3), output_shape='auto', name='l_region')  # sum over
            l_region_feedback2 = ReshapeLayer(
                l_region_feedback, (CFG['BATCH_SIZE'], 1, CFG['NUM_REGIONS']))

        l_weighted_region = WeightedSumLayer(
            [l_input_regions, l_region_feedback2], name='l_weighted_region')
        # define a cell
        l_cell_input = InputLayer((CFG['BATCH_SIZE'], CFG['EMBEDDING_SIZE']), name='l_cell_input')
        if CFG['FEEDBACK'] == 0:  # none
            l_cell_concat = l_cell_input
        elif CFG['FEEDBACK'] == 1:  # none
            l_region2 = ReshapeLayer(
                l_region_feedback2, ([0], [2]))
            l_cell_concat = lasagne.layers.ConcatLayer(
                [l_cell_input, l_region2], axis=1, name='l_cell_concat')
        elif CFG['FEEDBACK'] == 2:
            l_cell_concat = lasagne.layers.ConcatLayer(
                [l_cell_input, l_weighted_region], axis=1, name='l_cell_concat')
        elif CFG['FEEDBACK'] == 3:
            l_region2 = ReshapeLayer(
                l_region_feedback2, ([0], [2]))
            l_cell_concat = lasagne.layers.ConcatLayer(
                [l_cell_input, l_weighted_region, l_region2], axis=1, name='l_cell_concat')
        elif CFG['FEEDBACK'] == 4:
            # See RNNTraining.py for comments on this.
            from TProd3 import WeightedImageLayer
            l_weighted_image = WeightedImageLayer(
                [l_input_regions, l_region_feedback2], name='l_weighted_image')
            if CFG['IMGFEEDBACK_MECHANISM'] == 'highres':
                l_weighted_image_reshaped = ReshapeLayer(
                    l_weighted_image, ([0], [1], 14, 14), name='l_weighted_image_reshaped')
                l_weighted_image_conv_reduced = lasagne.layers.MaxPool2DLayer(
                    l_weighted_image_reshaped, (2, 2), name='l_weighted_image_conv_reduced')
                l_feedback_co1 = lasagne.layers.Conv2DLayer(
                    incoming=l_weighted_image_conv_reduced, num_filters=512, filter_size=(3, 3), pad='same', name='l_feedback_co1')
            else:
                l_weighted_image_reshaped = ReshapeLayer(
                    l_weighted_image, ([0], [1], 7, 7), name='l_weighted_image_reshaped')
                l_feedback_co1 = lasagne.layers.Conv2DLayer(
                    incoming=l_weighted_image_reshaped, num_filters=512, filter_size=(3, 3), pad='same', name='l_feedback_co1')

            l_feedback_po1 = lasagne.layers.MaxPool2DLayer(
                l_feedback_co1, (2, 2), name='l_feedback_po1')
            l_feedback_co2 = lasagne.layers.Conv2DLayer(
                incoming=l_feedback_po1, num_filters=512, filter_size=(3, 3), pad='same', name='l_feedback_co2')
            l_feedback_po2 = lasagne.layers.MaxPool2DLayer(
                l_feedback_co2, (2, 2), name='l_feedback_po2')
            l_feedback_po2_reshaped = ReshapeLayer(
                l_feedback_po2, ([0], [1]), name='l_feedback_po2_reshaped')
            l_cell_concat = lasagne.layers.ConcatLayer(
                [l_cell_input, l_feedback_po2_reshaped], axis=1, name='l_cell_concat')

        from agentnet.memory import GRUMemoryLayer
        l_gru = GRUMemoryLayer(CFG['EMBEDDING_SIZE'],
                               l_cell_concat, l_prev_gru, name='l_gru')
        l_dropout_output = DropoutLayer(
            l_gru, p=0.5, name='l_dropout_output')
        l_shp1 = ReshapeLayer(
            l_dropout_output, ([0], 1, [1]), name='l_shp1')

        if CFG.has_key('DISSECT') and CFG['DISSECT'] != 'No':
            import pdb
            pdb.set_trace()  # XXX BREAKPOINT
            if CFG['DISSECT'] == 'wr':
                l_decoder = TensorProdFactLayer((l_shp1, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'],
                                                dim_w=len(vocab), nonlinearity=softmax, name='l_tensor', W_hr='skip', b_hr='skip')
            elif CFG['DISSECT'] == 'rs':
                import pdb
                pdb.set_trace()  # XXX BREAKPOINT
                l_decoder = TensorProdFactLayer((l_shp1, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'],
                                                dim_w=len(vocab), nonlinearity=softmax, name='l_tensor', W_rw='skip', b_rw='skip')
        else:
            if CFG.has_key('DENSITY_TEMPERING') and CFG['DENSITY_TEMPERING']:
                print("TEMPERING")
                l_gamma = DenseLayer(
                    l_shp1, num_units=1, name='l_gamma')
                l_gamma_shp = ReshapeLayer(
                    l_gamma, ([0], [1], 1, 1))
                from TProd3 import TensorTemperatureLayer
                l_decoder = TensorTemperatureLayer((l_shp1, l_input_regions, l_gamma_shp), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'], dim_w=len(
                    vocab), nonlinearity=lasagne.nonlinearities.softmax, name='l_tensor')

            else:
                l_decoder = TensorProdFactLayer((l_shp1, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'],
                                                dim_w=len(vocab), nonlinearity=softmax, name='l_tensor')

        if CFG['TENSOR_COND_WORD']:
            from RNNTraining import get_Regions_cond_words
            l_region = ExpressionLayer(
                l_decoder, get_Regions_cond_words, output_shape='auto', name='l_region')
        else:
            l_region = ExpressionLayer(l_decoder, lambda X: X.sum(
                3), output_shape='auto', name='l_region')  # sum over
        l_region = ReshapeLayer(
            l_region, ([0], [2]), name='l_region')
        l_out = ExpressionLayer(l_decoder, lambda X: X.sum(
            2), output_shape='auto', name='l_out')  # sum over regions
        #}}}

    elif CFG['MODE'] == 'tensor-reducedw':
        # {{{2
        from TProd3 import TensorProdFactLayer
        # input:  [h(batch,dimh),r(num_batch,r_dim,num_r)]
        # output: [ h[0],r[2], dim_w ]
        l_input_regions = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS']), name='l_input_regins')

        if CFG.has_key('TENSOR_RECTIFY') and CFG['TENSOR_RECTIFY']:
            l_tensor = TensorProdFactLayer((l_dropout_output, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG[
                                           'REGION_SIZE'], dim_w=CFG['EMBEDDING_WORDS'], nonlinearity=lasagne.nonlinearities.rectify, name='l_tensor')
        else:
            l_tensor = TensorProdFactLayer((l_dropout_output, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG[
                                           'REGION_SIZE'], dim_w=CFG['EMBEDDING_WORDS'], nonlinearity=lasagne.nonlinearities.identity, name='l_tensor')
        # softmax does not accept non-flat layers, then flatten->softmax->reshape
        l_flatten = ReshapeLayer(
            l_decoder, (CFG['BATCH_SIZE'] * 1 * CFG['NUM_REGIONS'], CFG['EMBEDDING_WORDS']), name='l_flatten')
        l_words = DenseLayer(l_flatten, num_units=len(vocab),
                                            nonlinearity=lasagne.nonlinearities.identity, name='l_words')

        l_reshape = ReshapeLayer(
            l_words, (CFG['BATCH_SIZE'] * 1, CFG['NUM_REGIONS'] * len(vocab)), name='l_reshape')

        l_softmax = lasagne.layers.NonlinearityLayer(
            l_reshape, nonlinearity=lasagne.nonlinearities.softmax, name='l_softmax')

        l_reshape1 = ReshapeLayer(
            l_softmax, (CFG['BATCH_SIZE'], 1, CFG['NUM_REGIONS'], len(vocab)), name='l_reshape1')

        l_out = ExpressionLayer(l_reshape1, lambda X: X.sum(
            2), output_shape='auto', name='l_out')  # sum over regions
        # }}}

    elif CFG['MODE'] == 'tensor-removedWrw':
        from TProd3 import TensorProdFact2Layer
        # input:  [h(batch,dimh),r(num_batch,r_dim,num_r)]
        # output: [ h[0],r[2], dim_w ]
        l_input_regions = InputLayer((CFG['BATCH_SIZE'], CFG['REGION_SIZE'], CFG['NUM_REGIONS']), name='l_input_regins')

        l_decoder = TensorProdFact2Layer((l_dropout_output, l_input_regions), dim_h=CFG['EMBEDDING_SIZE'], dim_r=CFG['REGION_SIZE'], dim_w=len(
            vocab), nonlinearity=lasagne.nonlinearities.softmax, name='l_decoder')

        l_out = ExpressionLayer(l_decoder, lambda X: X.sum( 2), output_shape='auto', name='l_out')  # sum over regions

    net_dictionnary = {'loc1': l_loc1, 'input_loc': l_input_loc, 'sel_region2': l_sel_region2,
                       'loc': l_loc, 'conv': l_conv, 'prev': l_prev_gru, 'input': l_cell_input,
                       'gru': l_gru, 'sent': l_input_sentence, 'img': l_input_img, 'img2': l_input_img2,
                       'reg_feedback2': l_region_feedback, 'reg_feedback': l_region, 'reg': l_input_reg,
                       'out_reg': l_out_reg, 'out': l_out, 'cnn': l_cnn_embedding, 'sent_emb': l_sentence_embedding,
                       'decoder': l_decoder, 'boxes': l_boxes, 'weighted_regions_prev': l_weighted_region_prev,
                       'weighted_regions': l_weighted_region}
    return net_dictionnary