Example #1
0
def add_layer(incoming, num_channels, dropout):
    layer = ScaleLayer(incoming)
    layer = BiasLayer(layer)

    # Bottleneck layer to reduce number of input channels to 4 times the number of output channels
    layer = NonlinearityLayer(layer, nonlinearity=rectify)
    layer = Conv2DLayer(layer,
                        num_filters=4 * num_channels,
                        filter_size=(1, 1),
                        stride=(1, 1),
                        W=HeNormal(gain='relu'), b=None,
                        flip_filters=False,
                        nonlinearity=None)
    layer = BatchNormLayer(layer, beta=None, gamma=None)
    if dropout > 0:
        layer = DropoutLayer(layer, p=dropout)

    # Convolutional layer (using padding to keep same dimensions)
    layer = NonlinearityLayer(layer, nonlinearity=rectify)
    layer = Conv2DLayer(layer,
                        num_filters=num_channels,
                        filter_size=(3, 3),
                        stride=(1, 1),
                        W=HeNormal(gain='relu'), b=None,
                        pad='same',
                        flip_filters=False,
                        nonlinearity=None)
    layer = BatchNormLayer(layer, beta=None, gamma=None)
    if dropout > 0:
        layer = DropoutLayer(layer, p=dropout)

    # Concatenate the input filters with the new filters
    layer = ConcatLayer([incoming, layer], axis=1)

    return layer
Example #2
0
def ResidualModule(input_layer,
                   num_filters=64,
                   nonlinearity=rectify,
                   normalize=False,
                   stride=(1, 1),
                   conv_dropout=0.0):
    input_conv = Conv2DLayer(incoming=input_layer,
                             num_filters=num_filters,
                             filter_size=(3, 1),
                             stride=stride,
                             pad='same',
                             W=lasagne.init.GlorotUniform(),
                             nonlinearity=None,
                             b=None,
                             name='Residual module layer 1')
    l_prev = BatchNormalizeLayer(input_conv,
                                 normalize=normalize,
                                 nonlinearity=nonlinearity)
    l_prev = TiedDropoutLayer(l_prev, p=conv_dropout, name='Tied Dropout')

    l_prev = Conv2DLayer(incoming=l_prev,
                         num_filters=num_filters,
                         filter_size=(3, 1),
                         stride=(1, 1),
                         pad='same',
                         W=lasagne.init.GlorotUniform(),
                         nonlinearity=None,
                         b=None,
                         name='Residual module layer 2')
    if normalize:
        # Batch normalization is done "immediately after" convolutions
        l_prev = BatchNormLayer(l_prev, name='Batch norm')

    # Using 1x1 convolutions for shortcut projections. NiNLayer could be used as well
    # but doesn't' support strides
    l_skip = Conv2DLayer(input_layer,
                         num_filters=num_filters,
                         filter_size=(1, 1),
                         stride=stride,
                         nonlinearity=None,
                         b=None,
                         name='Shortcut')
    l_prev = ElemwiseSumLayer((l_prev, l_skip), name='Elementwise sum')

    # Add nonlinearity after summation
    l_prev = NonlinearityLayer(l_prev,
                               nonlinearity=nonlinearity,
                               name='Non-linearity')
    if not normalize:
        l_prev = BiasLayer(l_prev, name='Bias')

    l_prev = TiedDropoutLayer(l_prev, p=conv_dropout, name='Tied Dropout')
    return l_prev
def dense_fast_block(network, transition=False, first=False, filters=16):
    if transition:
        network = NonlinearityLayer(BiasLayer(ScaleLayer(network)),
                                    nonlinearity=rectify)
        network = ConvLayer(network,
                            network.output_shape[1],
                            1,
                            pad='same',
                            W=he_norm,
                            b=None,
                            nonlinearity=None)
        network = BatchNormLayer(
            Pool2DLayer(network, 2, mode='average_inc_pad'))

    network = NonlinearityLayer(BiasLayer(ScaleLayer(network)),
                                nonlinearity=rectify)
    conv = ConvLayer(network,
                     filters,
                     3,
                     pad='same',
                     W=he_norm,
                     b=None,
                     nonlinearity=None)
    return ConcatLayer([network, BatchNormLayer(conv)], axis=1)
Example #4
0
def affine_relu_conv(network, channels, filter_size, dropout, name_prefix):
    network = ScaleLayer(network, name=name_prefix + '_scale')
    network = BiasLayer(network, name=name_prefix + '_shift')
    network = NonlinearityLayer(network,
                                nonlinearity=rectify,
                                name=name_prefix + '_relu')
    network = Conv2DLayer(network,
                          channels,
                          filter_size,
                          pad='same',
                          W=lasagne.init.HeNormal(gain='relu'),
                          b=None,
                          nonlinearity=None,
                          name=name_prefix + '_conv')
    if dropout:
        network = DropoutLayer(network, dropout)
    return network
Example #5
0
def add_transition(incoming, num_filters, dropout):
    layer = ScaleLayer(incoming)
    layer = BiasLayer(layer)
    layer = NonlinearityLayer(layer, nonlinearity=rectify)
    # Reduce the number of filters
    layer = Conv2DLayer(layer,
                        num_filters=num_filters,
                        filter_size=(1, 1),
                        stride=(1, 1),
                        W=HeNormal(gain='relu'), b=None,
                        flip_filters=False,
                        nonlinearity=None)
    if dropout > 0:
        layer = DropoutLayer(layer, p=dropout)
    # Pooling layer to reduce the last two dimensions by half
    layer = Pool2DLayer(layer,
                        pool_size=(2, 2),
                        stride=(2, 2),
                        mode='average_exc_pad')
    layer = BatchNormLayer(layer, beta=None, gamma=None)
    return layer
Example #6
0
def BatchNormalizeLayer(l_prev, normalize=False, nonlinearity=rectify):
    """
    Batch normalise or add non-linearity and bias
    :param l_prev: input layer
    :param normalize: True or False
    :param nonlinearity: non-linearity to apply
    :return:
    """
    if normalize:
        # l_prev = NormalizeLayer(l_prev, alpha='single_pass')
        # l_prev = ScaleAndShiftLayer(l_prev)
        # l_prev = NonlinearityLayer(l_prev, nonlinearity=nonlinearity)
        l_prev = BatchNormLayer(l_prev, name='Batch norm')
        l_prev = NonlinearityLayer(l_prev,
                                   nonlinearity=nonlinearity,
                                   name='Non-linearity')
    else:
        l_prev = NonlinearityLayer(l_prev,
                                   nonlinearity=nonlinearity,
                                   name='Non-linearity')
        l_prev = BiasLayer(l_prev, name='Bias')
    return l_prev
Example #7
0
def build_densenet(
        input_var,
        input_shape=(None, 3, 224, 224),
        num_filters_init=64,
        growth_rate=32,
        dropout=0.2,
        num_classes=1000,
        stages=[6, 12, 24, 16]):
    if input_shape[2] % (2 ** len(stages)) != 0:
        raise ValueError("input_shape[2] must be a multiple of {}.".format(2 ** len(stages)))

    if input_shape[3] % (2 ** len(stages)) != 0:
        raise ValueError("input_shape[3] must be a multiple of {}.".format(2 ** len(stages)))

        # Input should be (BATCH_SIZE, NUM_CHANNELS, WIDTH, HEIGHT)
    # NUM_CHANNELS is usually 3 (R,G,B) and for the ImageNet example the width and height are 224
    network = InputLayer(input_shape, input_var)

    # Apply 2D convolutions with a 7x7 filter (pad by 3 on each side)
    # Because of the 2x2 stride the shape of the last two dimensions will be half the size of the input (112x112)
    network = Conv2DLayer(network,
                          num_filters=num_filters_init,
                          filter_size=(7, 7),
                          stride=(2, 2),
                          pad=(3, 3),
                          W=HeNormal(gain='relu'), b=None,
                          flip_filters=False,
                          nonlinearity=None)

    # Batch normalize
    network = BatchNormLayer(network, beta=None, gamma=None)

    # If dropout is enabled, apply after every convolutional and dense layer
    if dropout > 0:
        network = DropoutLayer(network, p=dropout)

    # Apply ReLU
    network = NonlinearityLayer(network, nonlinearity=rectify)

    # Keep the maximum value of a 3x3 pool with a 2x2 stride
    # This operation again divides the size of the last two dimensions by two (56x56)
    network = MaxPool2DLayer(network,
                             pool_size=(3, 3),
                             stride=(2, 2),
                             pad=(1, 1))

    # Add dense blocks
    for i, num_layers in enumerate(stages):
        # Except for the first block, we add a transition layer before the dense block that halves the number of filters, width and height
        if i > 0:
            network = add_transition(network, math.floor(network.output_shape[1] / 2), dropout)
        network = build_block(network, num_layers, growth_rate, dropout)

    # Apply global pooling and add a fully connected layer with softmax function
    network = ScaleLayer(network)
    network = BiasLayer(network)
    network = NonlinearityLayer(network, nonlinearity=rectify)
    network = GlobalPoolLayer(network)
    network = DenseLayer(network,
                         num_units=num_classes,
                         W=HeNormal(gain=1),
                         nonlinearity=softmax)

    return network
Example #8
0
def build_densenet(input_shape=(None, 3, 32, 32),
                   input_var=None,
                   classes=10,
                   depth=40,
                   first_output=16,
                   growth_rate=12,
                   num_blocks=3,
                   dropout=0):
    """
    Creates a DenseNet model in Lasagne.

    Parameters
    ----------
    input_shape : tuple
        The shape of the input layer, as ``(batchsize, channels, rows, cols)``.
        Any entry except ``channels`` can be ``None`` to indicate free size.
    input_var : Theano expression or None
        Symbolic input variable. Will be created automatically if not given.
    classes : int
        The number of classes of the softmax output.
    depth : int
        Depth of the network. Must be ``num_blocks * n + 1`` for some ``n``.
        (Parameterizing by depth rather than n makes it easier to follow the
        paper.)
    first_output : int
        Number of channels of initial convolution before entering the first
        dense block, should be of comparable size to `growth_rate`.
    growth_rate : int
        Number of feature maps added per layer.
    num_blocks : int
        Number of dense blocks (defaults to 3, as in the original paper).
    dropout : float
        The dropout rate. Set to zero (the default) to disable dropout.
    batchsize : int or None
        The batch size to build the model for, or ``None`` (the default) to
        allow any batch size.
    inputsize : int, tuple of int or None

    Returns
    -------
    network : Layer instance
        Lasagne Layer instance for the output layer.

    References
    ----------
    .. [1] Gao Huang et al. (2016):
           Densely Connected Convolutional Networks.
           https://arxiv.org/abs/1608.06993
    """
    if (depth - 1) % num_blocks != 0:
        raise ValueError("depth must be num_blocks * n + 1 for some n")

    # input and initial convolution
    network = InputLayer(input_shape, input_var, name='input')
    network = Conv2DLayer(network,
                          first_output,
                          3,
                          pad='same',
                          W=lasagne.init.HeNormal(gain='relu'),
                          b=None,
                          nonlinearity=None,
                          name='pre_conv')
    network = BatchNormLayer(network, name='pre_bn', beta=None, gamma=None)
    # note: The authors' implementation does *not* have a dropout after the
    #       initial convolution. This was missing in the paper, but important.
    # if dropout:
    #     network = DropoutLayer(network, dropout)
    # dense blocks with transitions in between
    n = (depth - 1) // num_blocks
    for b in range(num_blocks):
        network = dense_block(network,
                              n - 1,
                              growth_rate,
                              dropout,
                              name_prefix='block%d' % (b + 1))
        if b < num_blocks - 1:
            network = transition(network,
                                 dropout,
                                 name_prefix='block%d_trs' % (b + 1))
    # post processing until prediction
    network = ScaleLayer(network, name='post_scale')
    network = BiasLayer(network, name='post_shift')
    network = NonlinearityLayer(network,
                                nonlinearity=rectify,
                                name='post_relu')
    network = GlobalPoolLayer(network, name='post_pool')
    network = DenseLayer(network,
                         classes,
                         nonlinearity=softmax,
                         W=lasagne.init.HeNormal(gain=1),
                         name='output')
    return network
Example #9
0
def build_autoencoder(layer, nonlinearity='same', b=init.Constant(0.)):
    """
    Unfolds a stack of layers into a symmetric autoencoder with tied weights.
    Given a :class:`Layer` instance, this function builds a
    symmetric autoencoder with tied weights.
    Parameters
    ----------
    layer : a :class:`Layer` instance or a tuple
        The :class:`Layer` instance with respect to which a symmetric
        autoencoder is built.
    nonlinearity : 'same', list, callable, or None
        The nonlinearities that are applied to the decoding layer.
        If 'same', each decoder layer has the same nonlinearity as its
        corresponding encoder layer. If a list is provided, it must contain
        nonlinearities for each decoding layer. Otherwise, if a single
        nonlinearity is provided, it is applied to all decoder layers.
        If set to ``None``, all nonlinearities for the decoder layers are set
        to lasagne.nonlinearities.identity.
    b : callable, Theano shared variable, numpy array, list or None
        An initializer for the decoder biases. By default, all decoder
        biases are initialized to lasagne.init.Constant(0.). If a shared
        variable or a numpy array is provided, the shape must match the
        incoming shape (only in case all incoming shapes are the same).
        Additianlly, a list containing initializers for the biases of each
        decoder layer can be provided. If set to ``None``, the decoder
        layers will have no biases, and pass through their input instead.
    Returns
    -------
    layer: :class:`Layer` instance
       The output :class:`Layer` of the symmetric autoencoder with
       tied weights.
    encoder: :class:`Layer` instance
       The code :class:`Layer` of the autoencoder (see Notes)
    Notes
    -----
    The encoder (input) :class:`Layer` is changed using
    `unfold_bias_and_nonlinearity_layers`. Therefore, this layer is not the
    code layer anymore, because it has got its bias and nonlinearity stripped
    off.
    Examples
    --------
    >>> from lasagne.layers import InputLayer, DenseLayer
    >>> from lasagne.layers import build_autoencoder
    >>> l_in = InputLayer((100, 20))
    >>> l1 = DenseLayer(l_in, num_units=50)
    >>> l2 = DenseLayer(l1, num_units=10)
    >>> l_ae, l2 = build_autoencoder(l2, nonlinearity='same', b=None)
    """

    if isinstance(nonlinearity, (tuple, list)):
        n_idx = 0

    if isinstance(b, (tuple, list)):
        b_idx = 0

    encoder = unfold_bias_and_nonlinearity_layers(layer)
    layers = get_all_layers(encoder)
    autoencoder_layers = [encoder]

    kwargs_b = dict(b=None)
    kwargs_n = dict(nonlinearity=nonlinearities.identity)
    for i, layer in enumerate(layers[::-1]):

        incoming = autoencoder_layers[-1]
        if isinstance(layer, InputLayer):
            continue
        elif isinstance(layer, BiasLayer):
            if b is None:
                kwargs_b = dict(b=None)
            elif isinstance(b, (tuple, list)):
                kwargs_b = dict(b=b[b_idx])
                b_idx += 1
            else:
                kwargs_b = dict(b=b)
        elif isinstance(layer, NonlinearityLayer):
            if nonlinearity == 'same':
                kwargs_n = dict(nonlinearity=layer.nonlinearity)
            elif nonlinearity is None:
                kwargs_n = dict(nonlinearity=nonlinearities.identity)
            elif isinstance(nonlinearity, (tuple, list)):
                kwargs_n = dict(nonlinearity=nonlinearity[n_idx])
                n_idx += 1
            else:
                kwargs_n = dict(nonlinearity=nonlinearity)
        elif isinstance(layer, DropoutLayer):
            a_layer = DropoutLayer(incoming=incoming,
                                   p=layer.p,
                                   rescale=layer.rescale)
            autoencoder_layers.append(a_layer)
        elif isinstance(layer, GaussianNoiseLayer):
            a_layer = GaussianNoiseLayer(incoming=incoming, sigma=layer.sigma)
            autoencoder_layers.append(a_layer)
        else:
            a_layer = InverseLayer(incoming=incoming, layer=layer)
            if hasattr(layer, 'b'):
                a_layer = BiasLayer(incoming=a_layer, **kwargs_b)
            if hasattr(layer, 'nonlinearity'):
                a_layer = NonlinearityLayer(incoming=a_layer, **kwargs_n)
            autoencoder_layers.append(a_layer)

    return autoencoder_layers[-1], encoder
Example #10
0
def unfold_bias_and_nonlinearity_layers(layer):
    """
    Unfolds a stack of layers adding :class:`BiasLayer` and
    :class:`NonlinearityLayer` when needed.
    Given a :class:`Layer` instance representing a stacked network,
    this function adds a :class:`BiasLayer` instance and/or a
    :class:`NonlinearityLayer` instance in between each layer with attributes
    b (bias) and/or nonlinearity, with the same bias and nonlinearity,
    while deleting the bias and or setting the nonlinearity
    of the original layer to the identity
    function.
    Parameters
    ----------
    layer : a :class:`Layer` instance or a tuple
        The :class:`Layer` instance with respect to wich the new
        stacked Neural Network with added :class:`BiasLayer`: and
        class:`NonlinearityLayer` are built.
    Returns
    -------
    layer: :class:`Layer` instance
        The output :class:`Layer` of the symmetric autoencoder with
        tied weights.
    Examples
    --------
    >>> import lasagne
    >>> from lasagne.layers import InputLayer, DenseLayer
    >>> from lasagne.layers import BiasLayer, NonlinearityLayer
    >>> from lasagne.layers import unfold_bias_and_nonlinearity_layers
    >>> from lasagne.layers import get_all_layers
    >>> from lasagne.nonlinearities import tanh, sigmoid, identity
    >>> l_in = InputLayer((100, 20))
    >>> l1 = DenseLayer(l_in, num_units=50, nonlinearity=tanh)
    >>> l_out = DenseLayer(l1, num_units=10, nonlinearity=sigmoid)
    >>> l_out = unfold_bias_and_nonlinearity_layers(l_out)
    >>> all_layer_names = [l.__class__.__name__ for l in get_all_layers(l_out)]
    >>> all_layer_names[:4]
    ['InputLayer', 'DenseLayer', 'BiasLayer', 'NonlinearityLayer']
    >>> all_layer_names[4:]
    ['DenseLayer', 'BiasLayer', 'NonlinearityLayer']
    """
    layers = get_all_layers(layer)
    incoming = layers[0]

    for ii, layer in enumerate(layers[1:]):
        layer.input_layer = incoming
        # Check if the layer has a bias
        b = getattr(layer, 'b', None)
        add_bias = False
        # Check if the layer has a nonlinearity
        nonlinearity = getattr(layer, 'nonlinearity', None)
        add_nonlinearity = False
        if b is not None and not isinstance(layer, BiasLayer):
            layer.b = None
            del layer.params[b]
            add_bias = True
        if (nonlinearity is not None
                and not isinstance(layer, NonlinearityLayer)
                and nonlinearity != nonlinearities.identity):
            layer.nonlinearity = nonlinearities.identity
            add_nonlinearity = True

        if add_bias:
            layer = BiasLayer(incoming=layer, b=b)
        if add_nonlinearity:
            layer = NonlinearityLayer(incoming=layer,
                                      nonlinearity=nonlinearity)
        incoming = layer
    return layer
Example #11
0
 def batchnorm_pt2(incoming):
     """2nd part of batch normalization: scaling + biases."""
     return BiasLayer(ScaleLayer(incoming))
Example #12
0
def build_decoder(net):
    net['uconv5_3'] = ConvLayer(net['conv5_3'], 512, 3, pad=1)
    print "uconv5_3: {}".format(net['uconv5_3'].output_shape[1:])

    net['uconv5_2'] = ConvLayer(net['uconv5_3'], 512, 3, pad=1)
    print "uconv5_2: {}".format(net['uconv5_2'].output_shape[1:])

    net['uconv5_1'] = ConvLayer(net['uconv5_2'], 512, 3, pad=1)
    print "uconv5_1: {}".format(net['uconv5_1'].output_shape[1:])

    net['upool4'] = Upscale2DLayer(net['uconv5_1'], scale_factor=2)
    print "upool4: {}".format(net['upool4'].output_shape[1:])

    net['uconv4_3'] = ConvLayer(net['upool4'], 512, 3, pad=1)
    print "uconv4_3: {}".format(net['uconv4_3'].output_shape[1:])

    net['uconv4_2'] = ConvLayer(net['uconv4_3'], 512, 3, pad=1)
    print "uconv4_2: {}".format(net['uconv4_2'].output_shape[1:])

    net['uconv4_1'] = ConvLayer(net['uconv4_2'], 512, 3, pad=1)
    print "uconv4_1: {}".format(net['uconv4_1'].output_shape[1:])

    net['upool3'] = Upscale2DLayer(net['uconv4_1'], scale_factor=2)
    print "upool3: {}".format(net['upool3'].output_shape[1:])

    net['uconv3_3'] = ConvLayer(net['upool3'], 256, 3, pad=1)
    print "uconv3_3: {}".format(net['uconv3_3'].output_shape[1:])

    net['uconv3_2'] = ConvLayer(net['uconv3_3'], 256, 3, pad=1)
    print "uconv3_2: {}".format(net['uconv3_2'].output_shape[1:])

    net['uconv3_1'] = ConvLayer(net['uconv3_2'], 256, 3, pad=1)
    print "uconv3_1: {}".format(net['uconv3_1'].output_shape[1:])

    net['upool2'] = Upscale2DLayer(net['uconv3_1'], scale_factor=2)
    print "upool2: {}".format(net['upool2'].output_shape[1:])

    net['uconv2_2'] = ConvLayer(net['upool2'], 128, 3, pad=1)
    print "uconv2_2: {}".format(net['uconv2_2'].output_shape[1:])

    net['uconv2_1'] = ConvLayer(net['uconv2_2'], 128, 3, pad=1)
    print "uconv2_1: {}".format(net['uconv2_1'].output_shape[1:])

    net['upool1'] = Upscale2DLayer(net['uconv2_1'], scale_factor=2)
    print "upool1: {}".format(net['upool1'].output_shape[1:])

    net['uconv1_2'] = ConvLayer(
        net['upool1'],
        64,
        3,
        pad=1,
    )
    print "uconv1_2: {}".format(net['uconv1_2'].output_shape[1:])

    net['uconv1_1'] = ConvLayer(net['uconv1_2'], 64, 3, pad=1)
    print "uconv1_1: {}".format(net['uconv1_1'].output_shape[1:])

    net['output_encoder'] = ConvLayer(net['uconv1_1'],
                                      3,
                                      1,
                                      pad=0,
                                      nonlinearity=tanh)
    print "output_encoder: {}".format(net['output_encoder'].output_shape[1:])

    net['output_encoder_bias'] = BiasLayer(net['output_encoder'],
                                           b=lasagne.init.Constant(1))
    print "output_encoder_bias: {}".format(
        net['output_encoder_bias'].output_shape[1:])
    net['output_encoder_scaled'] = ScaleLayer(
        net['output_encoder_bias'], scales=lasagne.init.Constant(127.5))
    print "output_encoder_scaled: {}".format(
        net['output_encoder_scaled'].output_shape[1:])

    return net
Example #13
0
File: j22.py Project: HKCaesar/plnt
def build_densenet(l_in,
                   input_var=None,
                   first_output=64,
                   growth_rate=32,
                   num_blocks=4,
                   dropout=0):
    """
    Creates a DenseNet model in Lasagne.
    Parameters
    ----------
    input_shape : tuple
        The shape of the input layer, as ``(batchsize, channels, rows, cols)``.
        Any entry except ``channels`` can be ``None`` to indicate free size.
    input_var : Theano expression or None
        Symbolic input variable. Will be created automatically if not given.
    classes : int
        The number of classes of the softmax output.
    first_output : int
        Number of channels of initial convolution before entering the first
        dense block, should be of comparable size to `growth_rate`.
    growth_rate : int
        Number of feature maps added per layer.
    num_blocks : int
        Number of dense blocks (defaults to 3, as in the original paper).
    dropout : float
        The dropout rate. Set to zero (the default) to disable dropout.
    batchsize : int or None
        The batch size to build the model for, or ``None`` (the default) to
        allow any batch size.
    inputsize : int, tuple of int or None
    Returns
    -------
    network : Layer instance
        Lasagne Layer instance for the output layer.
    References
    ----------
    .. [1] Gao Huang et al. (2016):
           Densely Connected Convolutional Networks.
           https://arxiv.org/abs/1608.06993
    """

    nb_layers = [6, 12, 32, 32]  # For DenseNet-169
    nb_layers = [6, 12, 24, 16]  # For DenseNet-121
    # initial convolution
    network = Conv2DLayer(l_in,
                          first_output,
                          filter_size=7,
                          stride=2,
                          pad='same',
                          W=lasagne.init.HeNormal(gain='relu'),
                          b=None,
                          nonlinearity=None,
                          name='pre_conv')
    network = BatchNormLayer(network, name='pre_bn', beta=None, gamma=None)
    network = ScaleLayer(network, name='pre_scale')
    network = BiasLayer(network, name='pre_shift')
    network = dnn.MaxPool2DDNNLayer(network, pool_size=3, stride=2)
    # note: The authors' implementation does *not* have a dropout after the
    #       initial convolution. This was missing in the paper, but important.
    # if dropout:
    #     network = DropoutLayer(network, dropout)
    # dense blocks with transitions in between

    for b in range(num_blocks):
        network = dense_block(network,
                              nb_layers[b],
                              growth_rate,
                              dropout,
                              name_prefix='block%d' % (b + 1))
        if b < num_blocks - 1:
            network = transition(network,
                                 dropout,
                                 name_prefix='block%d_trs' % (b + 1))
    # post processing until prediction
    network = ScaleLayer(network, name='post_scale')
    network = BiasLayer(network, name='post_shift')
    network = NonlinearityLayer(network,
                                nonlinearity=rectify,
                                name='post_relu')

    return network