Python SoftmaxLayer.SoftmaxLayerの例、layers.SoftmaxLayer.SoftmaxLayer Pythonの例

コード例 #1

0

ファイルを表示

ファイル: model.py プロジェクト: xiongbo010/Author-Disambiguation

def build_model(text_len, negative_size, optimizer, word_size, entity_size,
                dim_size, word_static, entity_static, word_embedding, entity_embedding):
    text_input_layer = Input(shape=(text_len,), dtype='int32')

    word_embed_layer = Embedding(
        word_size, dim_size, input_length=text_len, name='word_embedding',
        weights=[word_embedding], trainable=not word_static
    )(text_input_layer)

    text_layer = TextRepresentationLayer(name='text_layer')(
        [word_embed_layer, text_input_layer]
    )

    entity_input_layer = Input(shape=(negative_size + 1,), dtype='int32')

    entity_embed_layer = Embedding(
        entity_size, dim_size, input_length=negative_size + 1,
        name='entity_embedding', weights=[entity_embedding],
        trainable=not entity_static
    )(entity_input_layer)

    similarity_layer = DotLayer(name='dot_layer')(
        [RepeatVector(negative_size + 1)(text_layer), entity_embed_layer]
    )

    predictions = SoftmaxLayer()(similarity_layer)

    model = Model(input=[text_input_layer, entity_input_layer],
                  output=predictions)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

コード例 #2

0

ファイルを表示

 def __init__(self, inshape, hidshape, noutputs):
     ninput = np.prod(inshape)
     nhid = np.prod(hidshape)
     nparams = (ninput + 1) * nhid + (nhid * noutputs)
     # TODO: 
     self.params = np.empty(nparams)
     self._grad = np.empty(nparams)
     inhidwts = ninput * nhid
     hidoutwts = nhid * noutputs
     self.layers = [
         LinearLayer(
             inshape,
             hidshape,
             params=self.params[0:inhidwts],
             grad=self._grad[0:inhidwts]
         ),
         LogisticLayer(
             hidshape,
             params=self.params[inhidwts:(inhidwts + nhid)],
             grad=self._grad[inhidwts:(inhidwts + nhid)]
         ),
         LinearLayer(
             hidshape,
             noutputs,
             params=self.params[(inhidwts + nhid):],
             grad=self._grad[(inhidwts + nhid):]
         ),
         SoftmaxLayer()
     ]

コード例 #3

0

ファイルを表示

ファイル: testing_gradients.py プロジェクト: tandachi/GNN

def test_softmax_layer():
    x_train = np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2],
                        [7.0, 3.2, 4.7, 1.4], [6.4, 3.2, 4.5, 1.5],
                        [6.3, 3.3, 6.0, 2.5], [5.8, 2.7, 5.1, 1.9]])
    y_train = np.array([0, 0, 1, 1, 2, 2])

    softmax = SoftmaxLayer()
    W = 0.001 * np.random.randn(4, 3)
    b = np.zeros((1, 3))
    reg_parameter = 0.001

    g_numerical_W = eval_numerical_gradient(softmax.forward_pass, x_train,
                                            y_train, W, b, reg_parameter)
    g_analytical_W = eval_analytical_gradient(softmax, x_train, y_train, W, b,
                                              reg_parameter)
    assert check_gradient(
        g_numerical_W, g_analytical_W
    ) <= 1e-7, "Error in calculating gradient of the SoftmaxLayer"

コード例 #4

0

ファイルを表示

ファイル: testing_gradients.py プロジェクト: tandachi/GNN

def test_tanh_layer():
    x_train = np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2],
                        [7.0, 3.2, 4.7, 1.4], [6.4, 3.2, 4.5, 1.5],
                        [6.3, 3.3, 6.0, 2.5], [5.8, 2.7, 5.1, 1.9]])
    y_train = np.array([0, 0, 1, 1, 2, 2])

    W1 = np.random.randn(4, 10) * 0.001
    b1 = np.zeros((1, 10))
    W2 = np.random.randn(10, 6) * 0.001
    b2 = np.zeros((1, 6))

    softmax = SoftmaxLayer()
    tanh = TanhLayer()
    reg_parameter = 0.001
    g_numerical_W = eval_hidden_numerical_gradient(tanh, softmax, x_train,
                                                   y_train, W1, b1, W2, b2,
                                                   reg_parameter)
    g_analytical_W = eval_hidden_analytical_gradient(tanh, softmax, x_train,
                                                     y_train, W1, b1, W2, b2,
                                                     reg_parameter)
    assert check_gradient(
        g_numerical_W, g_analytical_W
    ) <= 1e-7, "Error in calculating gradient of the TanhLayer"

コード例 #5

0

ファイルを表示

    def __init__(self, config):

        self.config = config

        batch_size = config['batch_size']
        flag_datalayer = config['use_data_layer']
        lib_conv = config['lib_conv']

        # ##################### BUILD NETWORK ##########################
        # allocate symbolic variables for the data
        # 'rand' is a random array used for random cropping/mirroring of data
        x = T.ftensor4('x')
        y = T.ivector('y')
        rand = T.fvector('rand')

        print '... building the model'
        self.layers = []
        params = []
        weight_types = []

        if flag_datalayer:
            data_layer = DataLayer(input=x, image_shape=(3, 256, 256,
                                                         batch_size),
                                   cropsize=227, rand=rand, mirror=True,
                                   flag_rand=config['rand_crop'])

            layer1_input = data_layer.output
        else:
            layer1_input = x

        convpool_layer1 = ConvPoolLayer(input=layer1_input,
                                        image_shape=(3, 227, 227, batch_size), 
                                        filter_shape=(3, 11, 11, 96), 
                                        convstride=4, padsize=0, group=1, 
                                        poolsize=3, poolstride=2, 
                                        bias_init=0.0, lrn=True,
                                        lib_conv=lib_conv,
                                        )
        self.layers.append(convpool_layer1)
        params += convpool_layer1.params
        weight_types += convpool_layer1.weight_type

        convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output,
                                        image_shape=(96, 27, 27, batch_size),
                                        filter_shape=(96, 5, 5, 256), 
                                        convstride=1, padsize=2, group=2, 
                                        poolsize=3, poolstride=2, 
                                        bias_init=0.1, lrn=True,
                                        lib_conv=lib_conv,
                                        )
        self.layers.append(convpool_layer2)
        params += convpool_layer2.params
        weight_types += convpool_layer2.weight_type

        convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output,
                                        image_shape=(256, 13, 13, batch_size),
                                        filter_shape=(256, 3, 3, 384), 
                                        convstride=1, padsize=1, group=1, 
                                        poolsize=1, poolstride=0, 
                                        bias_init=0.0, lrn=False,
                                        lib_conv=lib_conv,
                                        )
        self.layers.append(convpool_layer3)
        params += convpool_layer3.params
        weight_types += convpool_layer3.weight_type

        convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output,
                                        image_shape=(384, 13, 13, batch_size),
                                        filter_shape=(384, 3, 3, 384), 
                                        convstride=1, padsize=1, group=2, 
                                        poolsize=1, poolstride=0, 
                                        bias_init=0.1, lrn=False,
                                        lib_conv=lib_conv,
                                        )
        self.layers.append(convpool_layer4)
        params += convpool_layer4.params
        weight_types += convpool_layer4.weight_type

        convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output,
                                        image_shape=(384, 13, 13, batch_size),
                                        filter_shape=(384, 3, 3, 256), 
                                        convstride=1, padsize=1, group=2, 
                                        poolsize=3, poolstride=2, 
                                        bias_init=0.0, lrn=False,
                                        lib_conv=lib_conv,
                                        )
        self.layers.append(convpool_layer5)
        params += convpool_layer5.params
        weight_types += convpool_layer5.weight_type

        fc_layer6_input = T.flatten(
            convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2)
        fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096)
        self.layers.append(fc_layer6)
        params += fc_layer6.params
        weight_types += fc_layer6.weight_type

        dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096)

        fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096)
        self.layers.append(fc_layer7)
        params += fc_layer7.params
        weight_types += fc_layer7.weight_type

        dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096)

        softmax_layer8 = SoftmaxLayer(
            input=dropout_layer7.output, n_in=4096, n_out=1000)
        self.layers.append(softmax_layer8)
        params += softmax_layer8.params
        weight_types += softmax_layer8.weight_type

        # #################### NETWORK BUILT #######################

        self.cost = softmax_layer8.negative_log_likelihood(y)
        self.errors = softmax_layer8.errors(y)
        self.errors_top_5 = softmax_layer8.errors_top_x(y, 5)
        self.params = params
        self.x = x
        self.y = y
        self.rand = rand
        self.weight_types = weight_types
        self.batch_size = batch_size

コード例 #6

0

ファイルを表示

    def __init__(self,
                 input_shape=(None, 3, None, None),
                 n_classes=6,
                 n_filters_first_conv=48,
                 n_pool=4,
                 growth_rate=12,
                 n_layers_per_block=5,
                 dropout_p=0.5):
        """
        This code implements the Fully Convolutional DenseNet described in https://arxiv.org/abs/1611.09326
        The network consist of a downsampling path, where dense blocks and transition down are applied, followed
        by an upsampling path where transition up and dense blocks are applied.
        Skip connections are used between the downsampling path and the upsampling path
        Each layer is a composite function of BN - ReLU - Conv and the last layer is a softmax layer.

        :param input_shape: shape of the input batch. Only the first dimension (n_channels) is needed
        :param n_classes: number of classes
        :param n_filters_first_conv: number of filters for the first convolution applied
        :param n_pool: number of pooling layers = number of transition down = number of transition up
        :param growth_rate: number of new feature maps created by each layer in a dense block
        :param n_layers_per_block: number of layers per block. Can be an int or a list of size 2 * n_pool + 1
        :param dropout_p: dropout rate applied after each convolution (0. for not using)
        """

        if type(n_layers_per_block) == list:
            assert (len(n_layers_per_block) == 2 * n_pool + 1)
        elif type(n_layers_per_block) == int:
            n_layers_per_block = [n_layers_per_block] * (2 * n_pool + 1)
        else:
            raise ValueError

        # Theano variables
        self.input_var = T.tensor4('input_var', dtype='float32')  # input image
        self.target_var = T.tensor4('target_var', dtype='int32')  # target

        #####################
        # First Convolution #
        #####################

        inputs = InputLayer(input_shape, self.input_var)

        # We perform a first convolution. All the features maps will be stored in the tensor called stack (the Tiramisu)
        stack = Conv2DLayer(inputs,
                            n_filters_first_conv,
                            filter_size=3,
                            pad='same',
                            W=HeUniform(gain='relu'),
                            nonlinearity=linear,
                            flip_filters=False)
        # The number of feature maps in the stack is stored in the variable n_filters
        n_filters = n_filters_first_conv

        #####################
        # Downsampling path #
        #####################

        skip_connection_list = []

        for i in range(n_pool):
            # Dense Block
            for j in range(n_layers_per_block[i]):
                # Compute new feature maps
                l = BN_ReLU_Conv(stack, growth_rate, dropout_p=dropout_p)
                # And stack it : the Tiramisu is growing
                stack = ConcatLayer([stack, l])
                n_filters += growth_rate
            # At the end of the dense block, the current stack is stored in the skip_connections list
            skip_connection_list.append(stack)

            # Transition Down
            stack = TransitionDown(stack, n_filters, dropout_p)

        skip_connection_list = skip_connection_list[::-1]

        #####################
        #     Bottleneck    #
        #####################

        # We store now the output of the next dense block in a list. We will only upsample these new feature maps
        block_to_upsample = []

        # Dense Block
        for j in range(n_layers_per_block[n_pool]):
            l = BN_ReLU_Conv(stack, growth_rate, dropout_p=dropout_p)
            block_to_upsample.append(l)
            stack = ConcatLayer([stack, l])

        #######################
        #   Upsampling path   #
        #######################

        for i in range(n_pool):
            # Transition Up ( Upsampling + concatenation with the skip connection)
            n_filters_keep = growth_rate * n_layers_per_block[n_pool + i]
            stack = TransitionUp(skip_connection_list[i], block_to_upsample,
                                 n_filters_keep)

            # Dense Block
            block_to_upsample = []
            for j in range(n_layers_per_block[n_pool + i + 1]):
                l = BN_ReLU_Conv(stack, growth_rate, dropout_p=dropout_p)
                block_to_upsample.append(l)
                stack = ConcatLayer([stack, l])

        #####################
        #      Softmax      #
        #####################

        self.output_layer = SoftmaxLayer(stack, n_classes)

コード例 #7

0

ファイルを表示

import skimage.measure
import pickle
from readlabel import read_image
from network import Network
from layers import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer, ReLU, Sigmoid

whole_data = read_image(path1 = 'test_images/', path2 = './test_annotation', data_size = 1050)

whole_x = whole_data[0]
mean = whole_x.mean(axis=0)
std = whole_x.std(axis=0)
whole_x = (whole_x - mean) / std
whole_y = whole_data[1]
test_x = whole_x
test_y = whole_y
test_data = [test_x, test_y]

mini_batch_size = 1

# final
net = Network([ConvPoolLayer(filter_shape=(5, 5, 3, 9), image_shape=(mini_batch_size, 64, 64, 3), poolsize=2, activation_fn=ReLU),
               ConvPoolLayer(filter_shape=(5, 5, 9, 18), image_shape=(mini_batch_size, 30, 30, 9), poolsize=2, activation_fn=ReLU),
               ConvPoolLayer(filter_shape=(4, 4, 18, 36), image_shape=(mini_batch_size, 13, 13, 18), poolsize=2, activation_fn=ReLU),
               FullyConnectedLayer(n_in=900, n_out=225, activation_fn=ReLU),
               FullyConnectedLayer(n_in=225, n_out=50, activation_fn=ReLU),
               SoftmaxLayer(n_in=50, n_out=20, activation_fn=None)], mini_batch_size)

print('start')
net.load_test(mini_batch_size, test_data, path='./finalparams_noact.pickle')

コード例 #8

0

ファイルを表示

    i = T.lscalar()  # mini-batch index

    self.test_mb_predictions = theano.function([i],
                                               self.layers[-1].y_out,
                                               givens={self.x: observation},
                                               on_unused_input='warn')

    return self.test_mb_predictions(0)


#Initialize network
layers = [
    FullyConnectedLayer(n_in=4, n_out=10),
    FullyConnectedLayer(n_in=10, n_out=10),
    SoftmaxLayer(n_in=10, n_out=2)
]

params = [param for layer in layers for param in layer.params]
iterations = mini_batch_size

x = T.vector("x")
y = T.ivector("y")
init_layer = layers[0]
init_layer.set_inpt(x, 1)

for j in xrange(1, len(layers)):
    prev_layer, layer = layers[j - 1], layers[j]
    layer.set_inpt(prev_layer.output, 1)

cost = T.argmax(T.log(layers[-1].output))

コード例 #9

0

ファイルを表示

ファイル: softmaxclassifier.py プロジェクト: tandachi/GNN

import numpy as np
from layers import SoftmaxLayer
from datareader import load_mnist
from constants import *

x_train, y_train = load_mnist(MNIST_TRAINING_X , MNIST_TRAINING_y)
x_train = x_train.reshape(MNIST_NUM_TRAINING, MNIST_NUM_FEATURES)
y_train = y_train.reshape(MNIST_NUM_TRAINING)

# initialize parameters randomly
W = 0.001 * np.random.randn(MNIST_NUM_FEATURES, MNIST_NUM_OUTPUT)
b = np.zeros((1, MNIST_NUM_OUTPUT))

learning_rate = 0.1  # step size of the gradient descent algorithm
reg_parameter = 0.01  # regularization strength
softmax = SoftmaxLayer()

num_iter = 1000
BATCH_SIZE = 500


for i in range(num_iter):

    idx = np.random.choice(MNIST_NUM_TRAINING, BATCH_SIZE, replace=True)
    x_batch = x_train[idx, :]
    y_batch = y_train[idx]
    output_prob, loss = softmax.forward_pass(x_batch, y_batch, W, b, reg_parameter, [])
    if i % 50 == 0:
        print('iteration: {:3d} loss: {:3e}'.format(i, loss))
    gradW, gradB, _ = softmax.backward_pass(output_prob, x_batch, y_batch, W, b, reg_parameter)
    W = W - learning_rate * gradW

コード例 #10

0

ファイルを表示

ファイル: train.py プロジェクト: xinyufei/Simple-CNN-for-Image-Classification

test_data = [test_x, test_y]

# final
net = Network([
    ConvPoolLayer(filter_shape=(5, 5, 3, 9),
                  image_shape=(mini_batch_size, 64, 64, 3),
                  poolsize=2,
                  activation_fn=ReLU),
    ConvPoolLayer(filter_shape=(5, 5, 9, 18),
                  image_shape=(mini_batch_size, 30, 30, 9),
                  poolsize=2,
                  activation_fn=ReLU),
    ConvPoolLayer(filter_shape=(4, 4, 18, 36),
                  image_shape=(mini_batch_size, 13, 13, 18),
                  poolsize=2,
                  activation_fn=ReLU),
    FullyConnectedLayer(n_in=900, n_out=225, activation_fn=ReLU),
    FullyConnectedLayer(n_in=225, n_out=50, activation_fn=ReLU),
    SoftmaxLayer(n_in=50, n_out=20, activation_fn=None)
], mini_batch_size)

print('start')
net.train_save(training_data,
               13,
               mini_batch_size,
               0.001,
               validation_data,
               test_data,
               test=False,
               save=2)

コード例 #11

0

ファイルを表示

    def __init__(self, config):

        self.config = config

        batch_size = config['batch_size']
        num_seq = config['num_seq']
        self.n_timesteps = config['num_timesteps']

        num_joints = config['num_joints']
        classes_num = config['classes_num']
        # ##################### BUILD NETWORK ##########################
        mask = T.fvector('mask')
        y = T.lvector('y')
        target = T.ftensor3('target')
        rand = T.fvector('rand')
        trng = RandomStreams(1234)
        use_noise = T.fscalar('use_noise')

        print '... building the model'
        self.layers = []
        params = []
        weight_types = []

        conv_fea = T.ftensor4('conv_fea')  #(49, 16, 8, 1024)

        lstm_att_layer15 = JointAttentionLstmLayer(config,
                                                   num_joints,
                                                   conv_fea=conv_fea,
                                                   mask=mask,
                                                   batch_size=batch_size,
                                                   num_seq=num_seq,
                                                   trng=trng,
                                                   use_noise=use_noise,
                                                   n_in=1024 * 5,
                                                   n_out=1024,
                                                   dim_part=32)

        self.layers.append(lstm_att_layer15)
        params += lstm_att_layer15.params
        weight_types += lstm_att_layer15.weight_type
        self.conv_fea = conv_fea

        softmax_input = lstm_att_layer15.output

        softmax_layer15 = SoftmaxLayer(input=softmax_input,
                                       n_in=1024,
                                       n_out=21)
        self.layers.append(softmax_layer15)
        params += softmax_layer15.params
        weight_types += softmax_layer15.weight_type

        # #################### NETWORK BUILT #######################
        self.cost_nll = softmax_layer15.negative_log_likelihood(y, mask)
        self.cost_jhmdb_attention = T.mean(T.sum(T.sum(
            0.5 * (lstm_att_layer15.attention - target)**2, axis=1),
                                                 axis=1),
                                           axis=0,
                                           dtype=theano.config.floatX)
        self.cost = self.cost_nll + self.cost_jhmdb_attention
        self.errors_video = softmax_layer15.errors_video(
            y, mask, batch_size, num_seq)
        self.params = params
        self.prob = softmax_layer15.p_y_given_x

        self.mask = mask
        self.y = y
        self.target = target
        self.rand = rand
        self.weight_types = weight_types
        self.batch_size = batch_size
        self.num_seq = num_seq
        self.use_noise = use_noise

コード例 #12

0

ファイルを表示

ファイル: alex_net.py プロジェクト: yyuzhong/DL-Benchmarks

    def __init__(self, config):

        self.config = config

        batch_size = config.batch_size
        lib_conv = config.lib_conv
        group = (2 if config.grouping else 1)
        LRN = (True if config.LRN else False)
        print 'LRN, group', LRN, group

        # ##################### BUILD NETWORK ##########################
        # allocate symbolic variables for the data
        x = T.ftensor4('x')
        y = T.lvector('y')


        print '... building the model with ConvLib %s, LRN %s, grouping %i ' \
              % (lib_conv, LRN, group)
        self.layers = []
        params = []
        weight_types = []

        layer1_input = x

        convpool_layer1 = ConvPoolLayer(
            input=layer1_input,
            image_shape=((3, 224, 224,
                          batch_size) if lib_conv == 'cudaconvnet' else
                         (batch_size, 3, 227, 227)),
            filter_shape=((3, 11, 11, 96) if lib_conv == 'cudaconvnet' else
                          (96, 3, 11, 11)),
            convstride=4,
            padsize=(0 if lib_conv == 'cudaconvnet' else 3),
            group=1,
            poolsize=3,
            poolstride=2,
            bias_init=0.0,
            lrn=LRN,
            lib_conv=lib_conv)
        self.layers.append(convpool_layer1)
        params += convpool_layer1.params
        weight_types += convpool_layer1.weight_type

        convpool_layer2 = ConvPoolLayer(
            input=convpool_layer1.output,
            image_shape=((96, 27, 27,
                          batch_size) if lib_conv == 'cudaconvnet' else
                         (batch_size, 96, 27, 27)),
            filter_shape=((96, 5, 5, 256) if lib_conv == 'cudaconvnet' else
                          (256, 96, 5, 5)),
            convstride=1,
            padsize=2,
            group=group,
            poolsize=3,
            poolstride=2,
            bias_init=0.1,
            lrn=LRN,
            lib_conv=lib_conv,
        )
        self.layers.append(convpool_layer2)
        params += convpool_layer2.params
        weight_types += convpool_layer2.weight_type

        convpool_layer3 = ConvPoolLayer(
            input=convpool_layer2.output,
            image_shape=((256, 13, 13,
                          batch_size) if lib_conv == 'cudaconvnet' else
                         (batch_size, 256, 13, 13)),
            filter_shape=((256, 3, 3, 384) if lib_conv == 'cudaconvnet' else
                          (384, 256, 3, 3)),
            convstride=1,
            padsize=1,
            group=1,
            poolsize=1,
            poolstride=0,
            bias_init=0.0,
            lrn=False,
            lib_conv=lib_conv,
        )
        self.layers.append(convpool_layer3)
        params += convpool_layer3.params
        weight_types += convpool_layer3.weight_type

        convpool_layer4 = ConvPoolLayer(
            input=convpool_layer3.output,
            image_shape=((384, 13, 13,
                          batch_size) if lib_conv == 'cudaconvnet' else
                         (batch_size, 384, 13, 13)),
            filter_shape=((384, 3, 3, 384) if lib_conv == 'cudaconvnet' else
                          (384, 384, 3, 3)),
            convstride=1,
            padsize=1,
            group=group,
            poolsize=1,
            poolstride=0,
            bias_init=0.1,
            lrn=False,
            lib_conv=lib_conv,
        )
        self.layers.append(convpool_layer4)
        params += convpool_layer4.params
        weight_types += convpool_layer4.weight_type

        convpool_layer5 = ConvPoolLayer(
            input=convpool_layer4.output,
            image_shape=((384, 13, 13,
                          batch_size) if lib_conv == 'cudaconvnet' else
                         (batch_size, 384, 13, 13)),
            filter_shape=((384, 3, 3, 256) if lib_conv == 'cudaconvnet' else
                          (256, 384, 3, 3)),
            convstride=1,
            padsize=1,
            group=group,
            poolsize=3,
            poolstride=2,
            bias_init=0.0,
            lrn=False,
            lib_conv=lib_conv,
        )
        self.layers.append(convpool_layer5)
        params += convpool_layer5.params
        weight_types += convpool_layer5.weight_type

        if lib_conv == 'cudaconvnet':
            fc_layer6_input = T.flatten(
                convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2)
        else:
            fc_layer6_input = convpool_layer5.output.flatten(2)

        fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096)
        self.layers.append(fc_layer6)
        params += fc_layer6.params
        weight_types += fc_layer6.weight_type

        dropout_layer6 = DropoutLayer(fc_layer6.output)

        fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096)
        self.layers.append(fc_layer7)
        params += fc_layer7.params
        weight_types += fc_layer7.weight_type

        dropout_layer7 = DropoutLayer(fc_layer7.output)

        softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output,
                                      n_in=4096,
                                      n_out=1000)
        self.layers.append(softmax_layer8)
        params += softmax_layer8.params
        weight_types += softmax_layer8.weight_type

        # #################### NETWORK BUILT #######################

        self.cost = softmax_layer8.negative_log_likelihood(y)
        self.errors = softmax_layer8.errors(y)
        self.errors_top_5 = softmax_layer8.errors_top_x(y, 5)
        self.params = params
        self.x = x
        self.y = y
        # self.rand = rand
        self.weight_types = weight_types
        self.batch_size = batch_size

コード例 #13

0

ファイルを表示

    def __init__(self,
                 input,
                 n_in=28**2,
                 n_hidden_1=1024,
                 n_hidden_2=1024,
                 n_hidden_3=1024,
                 n_hidden_4=1024,
                 n_out=10,
                 W_hidden_1=None,
                 W_hidden_2=None,
                 W_hidden_3=None,
                 W_hidden_4=None,
                 W_out=None,
                 dropout=0.0,
                 seed=None):

        relu_activation = lambda x: T.nnet.relu(x, 0.1)
        # relu_activation = T.nnet.relu

        seed = np.random.randint(int(1e5)) if seed is None else seed

        self.dropout_layer_1 = DropoutLayer(input=input,
                                            seed=seed,
                                            dropout=dropout)

        self.hidden_1 = HiddenLayer(
            seed=seed + 1,
            # input=input,
            input=self.dropout_layer_1.output,
            # input=self.dropout_layer.output,
            n_in=n_in,
            n_out=n_hidden_1,
            activation=relu_activation,
            W=W_hidden_1,
        )

        self.dropout_layer_2 = DropoutLayer(input=self.hidden_1.output,
                                            seed=seed + 2,
                                            dropout=dropout)

        self.hidden_2 = HiddenLayer(
            seed=seed + 3,
            # input=self.hidden_1.output,
            input=self.dropout_layer_2.output,
            n_in=n_hidden_1,
            n_out=n_hidden_2,
            activation=relu_activation,
            W=W_hidden_2)

        self.dropout_layer_3 = DropoutLayer(input=self.hidden_2.output,
                                            seed=seed + 4,
                                            dropout=dropout)

        self.hidden_3 = HiddenLayer(seed=seed + 5,
                                    input=self.dropout_layer_3.output,
                                    n_in=n_hidden_2,
                                    n_out=n_hidden_3,
                                    activation=relu_activation,
                                    W=W_hidden_3)

        self.dropout_layer_4 = DropoutLayer(input=self.hidden_3.output,
                                            seed=seed + 6,
                                            dropout=dropout)

        self.hidden_4 = HiddenLayer(seed=seed + 7,
                                    input=self.dropout_layer_4.output,
                                    n_in=n_hidden_3,
                                    n_out=n_hidden_4,
                                    activation=relu_activation,
                                    W=W_hidden_4)

        self.dropout_layer_5 = DropoutLayer(input=self.hidden_4.output,
                                            seed=seed + 8,
                                            dropout=dropout)

        self.linear_layer = HiddenLayer(
            seed=seed + 9,
            # input=self.hidden_1.output,
            # input=self.hidden_2.output,
            input=self.dropout_layer_5.output,
            n_in=n_hidden_4,
            n_out=n_out,
            activation=identity_map,
            W=W_out)

        self.softmax_layer = SoftmaxLayer(input=self.linear_layer.output)

        # keep track of model input
        self.input = input
        self.p_y_given_x = self.softmax_layer.p_y_given_x
        self.y_pred = self.softmax_layer.y_pred

        self.L1 = (abs(self.hidden_1.W).sum() + abs(self.hidden_2.W).sum() +
                   abs(self.hidden_3.W).sum() + abs(self.hidden_4.W).sum() +
                   abs(self.linear_layer.W).sum())

        self.L2_sqr = (T.sum(self.hidden_1.W**2) + T.sum(self.hidden_2.W**2) +
                       T.sum(self.hidden_3.W**2) + T.sum(self.hidden_4.W**2) +
                       T.sum(self.linear_layer.W**2))

        self.mean_log_likelihood = (self.softmax_layer.mean_log_likelihood)
        self.errors = self.softmax_layer.errors

        self.params = (self.hidden_1.params + self.hidden_2.params +
                       self.hidden_3.params + self.hidden_4.params +
                       self.linear_layer.params)

コード例 #14

0

ファイルを表示

    def __init__(self, config):

        self.config = config

        batch_size = config['batch_size']
        batch_size = config['batch_size']
        flag_datalayer = config['use_data_layer']
        lib_conv = config['lib_conv']

        layers = []
        params = []
        weight_types = []

        # ##################### BUILD NETWORK ##########################
        # allocate symbolic variables for the data
        # 'rand' is a random array used for random cropping/mirroring of data
        x1 = T.ftensor4('x1')
        x2 = T.ftensor4('x2')
        y = T.lvector('y')  # The ground truth to be compared with will go here
        rand1 = T.fvector('rand1')
        rand2 = T.fvector('rand2')

        print '... building the model'

        if flag_datalayer:
            data_layerA = DataLayer(input=x1,
                                    image_shape=(3, 256, 256, batch_size),
                                    cropsize=227,
                                    rand=rand,
                                    mirror=True,
                                    flag_rand=config['rand_crop'])

            layer1A_input = data_layerA.output
        else:
            layer1A_input = x1

        if flag_datalayer:
            data_layerB = DataLayer(input=x2,
                                    image_shape=(3, 256, 256, batch_size),
                                    cropsize=227,
                                    rand=rand,
                                    mirror=True,
                                    flag_rand=config['rand_crop'])

            layer1B_input = data_layerB.output
        else:
            layer1B_input = x2

        fc_layer2_input = T.concatenate(
            (T.flatten(layer1A_input.dimshuffle(3, 0, 1, 2),
                       2), T.flatten(layer1B_input.dimshuffle(3, 0, 1, 2), 2)),
            axis=1)
        fc_layer2 = FCLayer(input=fc_layer2_input, n_in=154587 * 2, n_out=4096)
        layers.append(fc_layer2)
        params += fc_layer2.params
        weight_types += fc_layer2.weight_type

        dropout_layer2 = DropoutLayer(fc_layer2.output, n_in=4096, n_out=4096)

        fc_layer3 = FCLayer(input=dropout_layer2.output, n_in=4096, n_out=4096)
        layers.append(fc_layer3)
        params += fc_layer3.params
        weight_types += fc_layer3.weight_type

        dropout_layer3 = DropoutLayer(fc_layer3.output, n_in=4096, n_out=4096)

        # Final softmax layer
        softmax_layer3 = SoftmaxLayer(
            input=dropout_layer3.output, n_in=4096,
            n_out=2)  # Only a single binary output is required!
        layers.append(softmax_layer3)
        params += softmax_layer3.params
        weight_types += softmax_layer3.weight_type

        # #################### NETWORK BUILT #######################

        self.cost = softmax_layer3.negative_log_likelihood(y)
        self.errors = softmax_layer3.errors(y)
        self.errors_top_5 = softmax_layer3.errors_top_x(y, 5)
        self.x1 = x1
        self.x2 = x2
        self.y = y
        self.rand1 = rand1
        self.rand2 = rand2
        self.layers = layers
        self.params = params
        self.weight_types = weight_types
        self.batch_size = batch_size

コード例 #15

0

ファイルを表示

ファイル: alex_net.py プロジェクト: nibalsalha/TheanoAlexNet

    def __init__(self, config, testMode):

        self.config = config

        batch_size = config['batch_size']
        lib_conv = config['lib_conv']
        useLayers = config['useLayers']
        #imgWidth = config['imgWidth']
        #imgHeight = config['imgHeight']
        initWeights = config['initWeights']  #if we wish to initialize alexnet with some weights. #need to make changes in layers.py to accept initilizing weights
        if initWeights:
            weightsDir = config['weightsDir']
            weightFileTag = config['weightFileTag']
        prob_drop = config['prob_drop']

        # ##################### BUILD NETWORK ##########################
        x = T.ftensor4('x')
        mean = T.ftensor4('mean')
        #y = T.lvector('y')

        print '... building the model'
        self.layers = []
        params = []
        weight_types = []

        if useLayers >= 1:
            convpool_layer1 = ConvPoolLayer(input=x-mean,
                                        image_shape=(3, None, None, batch_size),
                                        filter_shape=(3, 11, 11, 96),
                                        convstride=4, padsize=0, group=1, 
                                        poolsize=3, poolstride=2, 
                                        bias_init=0.0, lrn=True,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_0'+weightFileTag, 'b_0'+weightFileTag]
                                        )
            self.layers.append(convpool_layer1)
            params += convpool_layer1.params
            weight_types += convpool_layer1.weight_type

        if useLayers >= 2:
            convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output,
                                        image_shape=(96, None, None, batch_size),    #change from 27 to appropriate value sbased on conv1's output
                                        filter_shape=(96, 5, 5, 256), 
                                        convstride=1, padsize=2, group=2, 
                                        poolsize=3, poolstride=2, 
                                        bias_init=0.1, lrn=True,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_1'+weightFileTag, 'W1_1'+weightFileTag, 'b0_1'+weightFileTag, 'b1_1'+weightFileTag]
                                        )
            self.layers.append(convpool_layer2)
            params += convpool_layer2.params
            weight_types += convpool_layer2.weight_type

        if useLayers >= 3:
            convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output,
                                        image_shape=(256, None, None, batch_size),
                                        filter_shape=(256, 3, 3, 384), 
                                        convstride=1, padsize=1, group=1, 
                                        poolsize=1, poolstride=0, 
                                        bias_init=0.0, lrn=False,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_2'+weightFileTag, 'b_2'+weightFileTag]
                                        )
            self.layers.append(convpool_layer3)
            params += convpool_layer3.params
            weight_types += convpool_layer3.weight_type

        if useLayers >= 4:
            convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output,
                                        image_shape=(384, None, None, batch_size),
                                        filter_shape=(384, 3, 3, 384), 
                                        convstride=1, padsize=1, group=2, 
                                        poolsize=1, poolstride=0, 
                                        bias_init=0.1, lrn=False,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_3'+weightFileTag, 'W1_3'+weightFileTag, 'b0_3'+weightFileTag, 'b1_3'+weightFileTag]
                                        )
            self.layers.append(convpool_layer4)
            params += convpool_layer4.params
            weight_types += convpool_layer4.weight_type

        if useLayers >= 5:
            convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output,
                                        image_shape=(384, None, None, batch_size),
                                        filter_shape=(384, 3, 3, 256), 
                                        convstride=1, padsize=1, group=2, 
                                        poolsize=3, poolstride=2, 
                                        bias_init=0.0, lrn=False,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_4'+weightFileTag, 'W1_4'+weightFileTag, 'b0_4'+weightFileTag, 'b1_4'+weightFileTag]
                                        )
            self.layers.append(convpool_layer5)
            params += convpool_layer5.params
            weight_types += convpool_layer5.weight_type

        if useLayers >= 6:
            fc_layer6_input = T.flatten(convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2)
            fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_5'+weightFileTag, 'b_5'+weightFileTag])
            self.layers.append(fc_layer6)
            params += fc_layer6.params
            weight_types += fc_layer6.weight_type
            if testMode:
                dropout_layer6 = fc_layer6
            else:
                dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096, prob_drop=prob_drop)

        if useLayers >= 7:
            fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_6'+weightFileTag, 'b_6'+weightFileTag])
            self.layers.append(fc_layer7)
            params += fc_layer7.params
            weight_types += fc_layer7.weight_type
            if testMode:
                dropout_layer6 = fc_layer7
            else:
                dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096, prob_drop=prob_drop)

        if useLayers >= 8:
            softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output, n_in=4096, n_out=1000, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_7'+weightFileTag, 'b_7'+weightFileTag])
            self.layers.append(softmax_layer8)
            params += softmax_layer8.params
            weight_types += softmax_layer8.weight_type

        # #################### NETWORK BUILT #######################

        self.output = self.layers[useLayers-1]
        self.params = params
        self.x = x
        self.mean = mean
        self.weight_types = weight_types
        self.batch_size = batch_size
        self.useLayers = useLayers
        self.outLayer = self.layers[useLayers-1]

        meanVal = np.load(config['mean_file'])
        meanVal = meanVal[:, :, :, np.newaxis].astype('float32')   #x is 4d, with 'batch' number of images. meanVal has only '1' in the 'batch' dimension. subtraction wont work.
        meanVal = np.tile(meanVal,(1,1,1,batch_size))
        self.meanVal = meanVal
        #meanVal = np.zeros([3,imgHeight,imgWidth,2], dtype='float32')

        if useLayers >= 8:  #if last layer is softmax, then its output is y_pred
            finalOut = self.outLayer.y_pred
        else:
            finalOut = self.outLayer.output
        self.forwardFunction = theano.function([self.x, In(self.mean, value=meanVal)], [finalOut])

コード例 #16

0

ファイルを表示

    def __init__(self, config):
        ModelBase.__init__(self)

        self.config = config
        self.verbose = self.config['verbose']
        self.name = 'alexnet'
        batch_size = config['batch_size']
        flag_datalayer = config['use_data_layer']
        lib_conv = config['lib_conv']
        n_softmax_out = config['n_softmax_out']
        # ##################### BUILD NETWORK ##########################
        # allocate symbolic variables for the data
        # 'rand' is a random array used for random cropping/mirroring of data
        x = T.ftensor4('x')
        y = T.lvector('y')
        rand = T.fvector('rand')
        lr = T.scalar('lr')

        if self.verbose: print 'AlexNet 2/16'
        self.layers = []
        params = []
        weight_types = []

        if flag_datalayer:
            data_layer = DataLayer(input=x,
                                   image_shape=(3, 256, 256, batch_size),
                                   cropsize=227,
                                   rand=rand,
                                   mirror=True,
                                   flag_rand=config['rand_crop'])

            layer1_input = data_layer.output
        else:
            layer1_input = x

        convpool_layer1 = ConvPoolLayer(input=layer1_input,
                                        image_shape=(3, 227, 227, batch_size),
                                        filter_shape=(3, 11, 11, 96),
                                        convstride=4,
                                        padsize=0,
                                        group=1,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.0,
                                        lrn=True,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer1)
        params += convpool_layer1.params
        weight_types += convpool_layer1.weight_type

        convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output,
                                        image_shape=(96, 27, 27, batch_size),
                                        filter_shape=(96, 5, 5, 256),
                                        convstride=1,
                                        padsize=2,
                                        group=2,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.1,
                                        lrn=True,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer2)
        params += convpool_layer2.params
        weight_types += convpool_layer2.weight_type

        convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output,
                                        image_shape=(256, 13, 13, batch_size),
                                        filter_shape=(256, 3, 3, 384),
                                        convstride=1,
                                        padsize=1,
                                        group=1,
                                        poolsize=1,
                                        poolstride=0,
                                        bias_init=0.0,
                                        lrn=False,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer3)
        params += convpool_layer3.params
        weight_types += convpool_layer3.weight_type

        convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output,
                                        image_shape=(384, 13, 13, batch_size),
                                        filter_shape=(384, 3, 3, 384),
                                        convstride=1,
                                        padsize=1,
                                        group=2,
                                        poolsize=1,
                                        poolstride=0,
                                        bias_init=0.1,
                                        lrn=False,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer4)
        params += convpool_layer4.params
        weight_types += convpool_layer4.weight_type

        convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output,
                                        image_shape=(384, 13, 13, batch_size),
                                        filter_shape=(384, 3, 3, 256),
                                        convstride=1,
                                        padsize=1,
                                        group=2,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.0,
                                        lrn=False,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer5)
        params += convpool_layer5.params
        weight_types += convpool_layer5.weight_type

        fc_layer6_input = T.flatten(
            convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2)
        fc_layer6 = FCLayer(input=fc_layer6_input,
                            n_in=9216,
                            n_out=4096,
                            verbose=self.verbose)
        self.layers.append(fc_layer6)
        params += fc_layer6.params
        weight_types += fc_layer6.weight_type

        dropout_layer6 = DropoutLayer(fc_layer6.output,
                                      n_in=4096,
                                      n_out=4096,
                                      verbose=self.verbose)

        fc_layer7 = FCLayer(input=dropout_layer6.output,
                            n_in=4096,
                            n_out=4096,
                            verbose=self.verbose)
        self.layers.append(fc_layer7)
        params += fc_layer7.params
        weight_types += fc_layer7.weight_type

        dropout_layer7 = DropoutLayer(fc_layer7.output,
                                      n_in=4096,
                                      n_out=4096,
                                      verbose=self.verbose)

        softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output,
                                      n_in=4096,
                                      n_out=n_softmax_out,
                                      verbose=self.verbose)
        self.layers.append(softmax_layer8)
        params += softmax_layer8.params
        weight_types += softmax_layer8.weight_type

        # #################### NETWORK BUILT #######################
        self.p_y_given_x = softmax_layer8.p_y_given_x
        self.y_pred = softmax_layer8.y_pred

        self.output = self.p_y_given_x

        self.cost = softmax_layer8.negative_log_likelihood(y)
        self.error = softmax_layer8.errors(y)
        if n_softmax_out < 5:
            self.error_top_5 = softmax_layer8.errors_top_x(y, n_softmax_out)
        else:
            self.error_top_5 = softmax_layer8.errors_top_x(y, 5)
        self.params = params

        # inputs
        self.x = x
        self.y = y
        self.rand = rand
        self.lr = lr
        self.shared_x = theano.shared(
            np.zeros(
                (3, config['input_width'], config['input_height'],
                 config['file_batch_size']),  # for loading large batch
                dtype=theano.config.floatX),
            borrow=True)

        self.shared_y = theano.shared(np.zeros((config['file_batch_size'], ),
                                               dtype=int),
                                      borrow=True)
        self.shared_lr = theano.shared(np.float32(config['learning_rate']))

        # training related
        self.base_lr = np.float32(config['learning_rate'])
        self.step_idx = 0
        self.mu = config['momentum']  # def: 0.9 # momentum
        self.eta = config['weight_decay']  #0.0002 # weight decay
        self.weight_types = weight_types
        self.batch_size = batch_size

        self.grads = T.grad(self.cost, self.params)

        subb_ind = T.iscalar('subb')  # sub batch index
        #print self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size].shape.eval()
        self.subb_ind = subb_ind
        self.shared_x_slice = self.shared_x[:, :, :, subb_ind *
                                            self.batch_size:(subb_ind + 1) *
                                            self.batch_size]
        self.shared_y_slice = self.shared_y[subb_ind *
                                            self.batch_size:(subb_ind + 1) *
                                            self.batch_size]

コード例 #17

0

ファイルを表示

ファイル: mlp_gauss.py プロジェクト: jiri-hron/masterproject

    def __init__(self, X, n_in, n_out, n_hidden_layers,
                 n_units_in, n_units_hidden,
                 M_lst=None, m_lst=None,
                 sigma_W_params_lst=None, sigma_b_params_lst=None,
                 sigma_W=1e-3, tune_sigma_W=True,
                 sigma_b=1e-6, tune_sigma_b=True,
                 l_W=1e-6, l_b=1e-6,
                 diag_noise=True, approx_cols=False,
                 divide_1st_layer_by_its_n_out=False,
                 b_out_deterministic=False, seed=None):
        assert n_hidden_layers > 0, 'n_layers must be positive'

        n_layers = n_hidden_layers + 1

        M_lst = [None] * (n_layers) if M_lst is None else M_lst
        m_lst = [None] * (n_layers) if m_lst is None else m_lst

        if sigma_W_params_lst is None:
            sigma_W_params_lst = [None] * (n_layers)
        if sigma_b_params_lst is None:
            sigma_b_params_lst = [None] * (n_layers)

        assert \
            len(M_lst) ==  len(m_lst) == len(sigma_W_params_lst) == \
            len(sigma_b_params_lst) == n_layers, \
            'length of all lists must be hte same and equal to ' \
            '(n_layers + 1) where the +1 is for the output layer mapping'

        # set seed to ensure each layer is init differently (cf. seed += 1)
        seed = np.random.randint(int(1e6)) if seed is None else seed
        np.random.seed(seed)

        def activation(x):
            return T.nnet.relu(x, alpha=0.1)

        self.in_layer = GaussLayer(
            input=X, n_in=n_in, n_out=n_units_in,
            M=M_lst[0], m=m_lst[0],
            sigma_W=sigma_W, tune_sigma_W=tune_sigma_W,
            sigma_W_params=sigma_W_params_lst[0],
            sigma_b=sigma_b, tune_sigma_b=tune_sigma_b,
            sigma_b_params=sigma_b_params_lst[0],
            l_W=l_W, l_b=l_b, diag_noise=diag_noise,
            activation=activation, approx_cols=approx_cols,
            seed=seed, name='h1'
        )
        self.layers = [self.in_layer]
        seed += 1

        # specific settings necessary for initialisation of deep GPs
        if divide_1st_layer_by_its_n_out:
            sqrt_n_out = T.constant(self.in_layer.n_out ** 0.5, dtype=floatX)
            self.in_layer.output /= sqrt_n_out

        # the first hidden layer was already set up above
        for i in xrange(1, n_hidden_layers):
            prev_layer = self.layers[-1]
            layer = GaussLayer(
                input=prev_layer.output,
                n_in=prev_layer.n_out, n_out=n_units_hidden,
                M=M_lst[i], m=m_lst[i],
                sigma_W=sigma_W, tune_sigma_W=tune_sigma_W,
                sigma_W_params=sigma_W_params_lst[i],
                sigma_b=sigma_b, tune_sigma_b=tune_sigma_b,
                sigma_b_params=sigma_b_params_lst[i],
                l_W=l_W, l_b=l_b, diag_noise=diag_noise,
                activation=activation, name='h' + str(i + 1),
                approx_cols=approx_cols, seed=seed
            )
            self.layers += [layer]
            seed += 1

        # initialised separately because of the necessary linear activation
        prev_layer = self.layers[-1]
        self.out_layer = GaussLayer(
            input=prev_layer.output, n_in=prev_layer.n_out, n_out=n_out,
            M=M_lst[-1], m=m_lst[-1],
            sigma_W=sigma_W, tune_sigma_W=tune_sigma_W,
            sigma_W_params=sigma_W_params_lst[-1],
            sigma_b=sigma_b, tune_sigma_b=tune_sigma_b,
            sigma_b_params=sigma_b_params_lst[-1],
            l_W=l_W, l_b=l_b, diag_noise=diag_noise,
            b_is_deterministic=b_out_deterministic,
            approx_cols=approx_cols, name='out', seed=seed
        )
        self.layers += [self.out_layer]

        self.softmax = SoftmaxLayer(
            input=self.out_layer.output, name='softmax'
        )

        self.params = reduce(
            lambda x, y: x + y, [layer.grad_params for layer in self.layers]
        )

        self.input = X

        self.p_y_given_x = self.softmax.p_y_given_x
        self.y_pred = self.softmax.y_pred
        self.mean_log_likelihood = self.softmax.mean_log_likelihood
        self.errors = self.softmax.errors

        # self.kl_W = T.sum([layer.kl_W() for layer in self.layers])
        # self.kl_b = T.sum([layer.kl_b() for layer in self.layers])
        # self.kl = self.kl_W + self.kl_b

        self.effect_kl_W = T.sum([layer.effect_kl_W() for layer in self.layers])
        self.effect_kl_b = T.sum([layer.effect_kl_b() for layer in self.layers])
        self.effect_kl = self.effect_kl_W + self.effect_kl_b