def add_layer(incoming, num_channels, dropout): layer = ScaleLayer(incoming) layer = BiasLayer(layer) # Bottleneck layer to reduce number of input channels to 4 times the number of output channels layer = NonlinearityLayer(layer, nonlinearity=rectify) layer = Conv2DLayer(layer, num_filters=4 * num_channels, filter_size=(1, 1), stride=(1, 1), W=HeNormal(gain='relu'), b=None, flip_filters=False, nonlinearity=None) layer = BatchNormLayer(layer, beta=None, gamma=None) if dropout > 0: layer = DropoutLayer(layer, p=dropout) # Convolutional layer (using padding to keep same dimensions) layer = NonlinearityLayer(layer, nonlinearity=rectify) layer = Conv2DLayer(layer, num_filters=num_channels, filter_size=(3, 3), stride=(1, 1), W=HeNormal(gain='relu'), b=None, pad='same', flip_filters=False, nonlinearity=None) layer = BatchNormLayer(layer, beta=None, gamma=None) if dropout > 0: layer = DropoutLayer(layer, p=dropout) # Concatenate the input filters with the new filters layer = ConcatLayer([incoming, layer], axis=1) return layer
def ResidualModule(input_layer, num_filters=64, nonlinearity=rectify, normalize=False, stride=(1, 1), conv_dropout=0.0): input_conv = Conv2DLayer(incoming=input_layer, num_filters=num_filters, filter_size=(3, 1), stride=stride, pad='same', W=lasagne.init.GlorotUniform(), nonlinearity=None, b=None, name='Residual module layer 1') l_prev = BatchNormalizeLayer(input_conv, normalize=normalize, nonlinearity=nonlinearity) l_prev = TiedDropoutLayer(l_prev, p=conv_dropout, name='Tied Dropout') l_prev = Conv2DLayer(incoming=l_prev, num_filters=num_filters, filter_size=(3, 1), stride=(1, 1), pad='same', W=lasagne.init.GlorotUniform(), nonlinearity=None, b=None, name='Residual module layer 2') if normalize: # Batch normalization is done "immediately after" convolutions l_prev = BatchNormLayer(l_prev, name='Batch norm') # Using 1x1 convolutions for shortcut projections. NiNLayer could be used as well # but doesn't' support strides l_skip = Conv2DLayer(input_layer, num_filters=num_filters, filter_size=(1, 1), stride=stride, nonlinearity=None, b=None, name='Shortcut') l_prev = ElemwiseSumLayer((l_prev, l_skip), name='Elementwise sum') # Add nonlinearity after summation l_prev = NonlinearityLayer(l_prev, nonlinearity=nonlinearity, name='Non-linearity') if not normalize: l_prev = BiasLayer(l_prev, name='Bias') l_prev = TiedDropoutLayer(l_prev, p=conv_dropout, name='Tied Dropout') return l_prev
def dense_fast_block(network, transition=False, first=False, filters=16): if transition: network = NonlinearityLayer(BiasLayer(ScaleLayer(network)), nonlinearity=rectify) network = ConvLayer(network, network.output_shape[1], 1, pad='same', W=he_norm, b=None, nonlinearity=None) network = BatchNormLayer( Pool2DLayer(network, 2, mode='average_inc_pad')) network = NonlinearityLayer(BiasLayer(ScaleLayer(network)), nonlinearity=rectify) conv = ConvLayer(network, filters, 3, pad='same', W=he_norm, b=None, nonlinearity=None) return ConcatLayer([network, BatchNormLayer(conv)], axis=1)
def affine_relu_conv(network, channels, filter_size, dropout, name_prefix): network = ScaleLayer(network, name=name_prefix + '_scale') network = BiasLayer(network, name=name_prefix + '_shift') network = NonlinearityLayer(network, nonlinearity=rectify, name=name_prefix + '_relu') network = Conv2DLayer(network, channels, filter_size, pad='same', W=lasagne.init.HeNormal(gain='relu'), b=None, nonlinearity=None, name=name_prefix + '_conv') if dropout: network = DropoutLayer(network, dropout) return network
def add_transition(incoming, num_filters, dropout): layer = ScaleLayer(incoming) layer = BiasLayer(layer) layer = NonlinearityLayer(layer, nonlinearity=rectify) # Reduce the number of filters layer = Conv2DLayer(layer, num_filters=num_filters, filter_size=(1, 1), stride=(1, 1), W=HeNormal(gain='relu'), b=None, flip_filters=False, nonlinearity=None) if dropout > 0: layer = DropoutLayer(layer, p=dropout) # Pooling layer to reduce the last two dimensions by half layer = Pool2DLayer(layer, pool_size=(2, 2), stride=(2, 2), mode='average_exc_pad') layer = BatchNormLayer(layer, beta=None, gamma=None) return layer
def BatchNormalizeLayer(l_prev, normalize=False, nonlinearity=rectify): """ Batch normalise or add non-linearity and bias :param l_prev: input layer :param normalize: True or False :param nonlinearity: non-linearity to apply :return: """ if normalize: # l_prev = NormalizeLayer(l_prev, alpha='single_pass') # l_prev = ScaleAndShiftLayer(l_prev) # l_prev = NonlinearityLayer(l_prev, nonlinearity=nonlinearity) l_prev = BatchNormLayer(l_prev, name='Batch norm') l_prev = NonlinearityLayer(l_prev, nonlinearity=nonlinearity, name='Non-linearity') else: l_prev = NonlinearityLayer(l_prev, nonlinearity=nonlinearity, name='Non-linearity') l_prev = BiasLayer(l_prev, name='Bias') return l_prev
def build_densenet( input_var, input_shape=(None, 3, 224, 224), num_filters_init=64, growth_rate=32, dropout=0.2, num_classes=1000, stages=[6, 12, 24, 16]): if input_shape[2] % (2 ** len(stages)) != 0: raise ValueError("input_shape[2] must be a multiple of {}.".format(2 ** len(stages))) if input_shape[3] % (2 ** len(stages)) != 0: raise ValueError("input_shape[3] must be a multiple of {}.".format(2 ** len(stages))) # Input should be (BATCH_SIZE, NUM_CHANNELS, WIDTH, HEIGHT) # NUM_CHANNELS is usually 3 (R,G,B) and for the ImageNet example the width and height are 224 network = InputLayer(input_shape, input_var) # Apply 2D convolutions with a 7x7 filter (pad by 3 on each side) # Because of the 2x2 stride the shape of the last two dimensions will be half the size of the input (112x112) network = Conv2DLayer(network, num_filters=num_filters_init, filter_size=(7, 7), stride=(2, 2), pad=(3, 3), W=HeNormal(gain='relu'), b=None, flip_filters=False, nonlinearity=None) # Batch normalize network = BatchNormLayer(network, beta=None, gamma=None) # If dropout is enabled, apply after every convolutional and dense layer if dropout > 0: network = DropoutLayer(network, p=dropout) # Apply ReLU network = NonlinearityLayer(network, nonlinearity=rectify) # Keep the maximum value of a 3x3 pool with a 2x2 stride # This operation again divides the size of the last two dimensions by two (56x56) network = MaxPool2DLayer(network, pool_size=(3, 3), stride=(2, 2), pad=(1, 1)) # Add dense blocks for i, num_layers in enumerate(stages): # Except for the first block, we add a transition layer before the dense block that halves the number of filters, width and height if i > 0: network = add_transition(network, math.floor(network.output_shape[1] / 2), dropout) network = build_block(network, num_layers, growth_rate, dropout) # Apply global pooling and add a fully connected layer with softmax function network = ScaleLayer(network) network = BiasLayer(network) network = NonlinearityLayer(network, nonlinearity=rectify) network = GlobalPoolLayer(network) network = DenseLayer(network, num_units=num_classes, W=HeNormal(gain=1), nonlinearity=softmax) return network
def build_densenet(input_shape=(None, 3, 32, 32), input_var=None, classes=10, depth=40, first_output=16, growth_rate=12, num_blocks=3, dropout=0): """ Creates a DenseNet model in Lasagne. Parameters ---------- input_shape : tuple The shape of the input layer, as ``(batchsize, channels, rows, cols)``. Any entry except ``channels`` can be ``None`` to indicate free size. input_var : Theano expression or None Symbolic input variable. Will be created automatically if not given. classes : int The number of classes of the softmax output. depth : int Depth of the network. Must be ``num_blocks * n + 1`` for some ``n``. (Parameterizing by depth rather than n makes it easier to follow the paper.) first_output : int Number of channels of initial convolution before entering the first dense block, should be of comparable size to `growth_rate`. growth_rate : int Number of feature maps added per layer. num_blocks : int Number of dense blocks (defaults to 3, as in the original paper). dropout : float The dropout rate. Set to zero (the default) to disable dropout. batchsize : int or None The batch size to build the model for, or ``None`` (the default) to allow any batch size. inputsize : int, tuple of int or None Returns ------- network : Layer instance Lasagne Layer instance for the output layer. References ---------- .. [1] Gao Huang et al. (2016): Densely Connected Convolutional Networks. https://arxiv.org/abs/1608.06993 """ if (depth - 1) % num_blocks != 0: raise ValueError("depth must be num_blocks * n + 1 for some n") # input and initial convolution network = InputLayer(input_shape, input_var, name='input') network = Conv2DLayer(network, first_output, 3, pad='same', W=lasagne.init.HeNormal(gain='relu'), b=None, nonlinearity=None, name='pre_conv') network = BatchNormLayer(network, name='pre_bn', beta=None, gamma=None) # note: The authors' implementation does *not* have a dropout after the # initial convolution. This was missing in the paper, but important. # if dropout: # network = DropoutLayer(network, dropout) # dense blocks with transitions in between n = (depth - 1) // num_blocks for b in range(num_blocks): network = dense_block(network, n - 1, growth_rate, dropout, name_prefix='block%d' % (b + 1)) if b < num_blocks - 1: network = transition(network, dropout, name_prefix='block%d_trs' % (b + 1)) # post processing until prediction network = ScaleLayer(network, name='post_scale') network = BiasLayer(network, name='post_shift') network = NonlinearityLayer(network, nonlinearity=rectify, name='post_relu') network = GlobalPoolLayer(network, name='post_pool') network = DenseLayer(network, classes, nonlinearity=softmax, W=lasagne.init.HeNormal(gain=1), name='output') return network
def build_autoencoder(layer, nonlinearity='same', b=init.Constant(0.)): """ Unfolds a stack of layers into a symmetric autoencoder with tied weights. Given a :class:`Layer` instance, this function builds a symmetric autoencoder with tied weights. Parameters ---------- layer : a :class:`Layer` instance or a tuple The :class:`Layer` instance with respect to which a symmetric autoencoder is built. nonlinearity : 'same', list, callable, or None The nonlinearities that are applied to the decoding layer. If 'same', each decoder layer has the same nonlinearity as its corresponding encoder layer. If a list is provided, it must contain nonlinearities for each decoding layer. Otherwise, if a single nonlinearity is provided, it is applied to all decoder layers. If set to ``None``, all nonlinearities for the decoder layers are set to lasagne.nonlinearities.identity. b : callable, Theano shared variable, numpy array, list or None An initializer for the decoder biases. By default, all decoder biases are initialized to lasagne.init.Constant(0.). If a shared variable or a numpy array is provided, the shape must match the incoming shape (only in case all incoming shapes are the same). Additianlly, a list containing initializers for the biases of each decoder layer can be provided. If set to ``None``, the decoder layers will have no biases, and pass through their input instead. Returns ------- layer: :class:`Layer` instance The output :class:`Layer` of the symmetric autoencoder with tied weights. encoder: :class:`Layer` instance The code :class:`Layer` of the autoencoder (see Notes) Notes ----- The encoder (input) :class:`Layer` is changed using `unfold_bias_and_nonlinearity_layers`. Therefore, this layer is not the code layer anymore, because it has got its bias and nonlinearity stripped off. Examples -------- >>> from lasagne.layers import InputLayer, DenseLayer >>> from lasagne.layers import build_autoencoder >>> l_in = InputLayer((100, 20)) >>> l1 = DenseLayer(l_in, num_units=50) >>> l2 = DenseLayer(l1, num_units=10) >>> l_ae, l2 = build_autoencoder(l2, nonlinearity='same', b=None) """ if isinstance(nonlinearity, (tuple, list)): n_idx = 0 if isinstance(b, (tuple, list)): b_idx = 0 encoder = unfold_bias_and_nonlinearity_layers(layer) layers = get_all_layers(encoder) autoencoder_layers = [encoder] kwargs_b = dict(b=None) kwargs_n = dict(nonlinearity=nonlinearities.identity) for i, layer in enumerate(layers[::-1]): incoming = autoencoder_layers[-1] if isinstance(layer, InputLayer): continue elif isinstance(layer, BiasLayer): if b is None: kwargs_b = dict(b=None) elif isinstance(b, (tuple, list)): kwargs_b = dict(b=b[b_idx]) b_idx += 1 else: kwargs_b = dict(b=b) elif isinstance(layer, NonlinearityLayer): if nonlinearity == 'same': kwargs_n = dict(nonlinearity=layer.nonlinearity) elif nonlinearity is None: kwargs_n = dict(nonlinearity=nonlinearities.identity) elif isinstance(nonlinearity, (tuple, list)): kwargs_n = dict(nonlinearity=nonlinearity[n_idx]) n_idx += 1 else: kwargs_n = dict(nonlinearity=nonlinearity) elif isinstance(layer, DropoutLayer): a_layer = DropoutLayer(incoming=incoming, p=layer.p, rescale=layer.rescale) autoencoder_layers.append(a_layer) elif isinstance(layer, GaussianNoiseLayer): a_layer = GaussianNoiseLayer(incoming=incoming, sigma=layer.sigma) autoencoder_layers.append(a_layer) else: a_layer = InverseLayer(incoming=incoming, layer=layer) if hasattr(layer, 'b'): a_layer = BiasLayer(incoming=a_layer, **kwargs_b) if hasattr(layer, 'nonlinearity'): a_layer = NonlinearityLayer(incoming=a_layer, **kwargs_n) autoencoder_layers.append(a_layer) return autoencoder_layers[-1], encoder
def unfold_bias_and_nonlinearity_layers(layer): """ Unfolds a stack of layers adding :class:`BiasLayer` and :class:`NonlinearityLayer` when needed. Given a :class:`Layer` instance representing a stacked network, this function adds a :class:`BiasLayer` instance and/or a :class:`NonlinearityLayer` instance in between each layer with attributes b (bias) and/or nonlinearity, with the same bias and nonlinearity, while deleting the bias and or setting the nonlinearity of the original layer to the identity function. Parameters ---------- layer : a :class:`Layer` instance or a tuple The :class:`Layer` instance with respect to wich the new stacked Neural Network with added :class:`BiasLayer`: and class:`NonlinearityLayer` are built. Returns ------- layer: :class:`Layer` instance The output :class:`Layer` of the symmetric autoencoder with tied weights. Examples -------- >>> import lasagne >>> from lasagne.layers import InputLayer, DenseLayer >>> from lasagne.layers import BiasLayer, NonlinearityLayer >>> from lasagne.layers import unfold_bias_and_nonlinearity_layers >>> from lasagne.layers import get_all_layers >>> from lasagne.nonlinearities import tanh, sigmoid, identity >>> l_in = InputLayer((100, 20)) >>> l1 = DenseLayer(l_in, num_units=50, nonlinearity=tanh) >>> l_out = DenseLayer(l1, num_units=10, nonlinearity=sigmoid) >>> l_out = unfold_bias_and_nonlinearity_layers(l_out) >>> all_layer_names = [l.__class__.__name__ for l in get_all_layers(l_out)] >>> all_layer_names[:4] ['InputLayer', 'DenseLayer', 'BiasLayer', 'NonlinearityLayer'] >>> all_layer_names[4:] ['DenseLayer', 'BiasLayer', 'NonlinearityLayer'] """ layers = get_all_layers(layer) incoming = layers[0] for ii, layer in enumerate(layers[1:]): layer.input_layer = incoming # Check if the layer has a bias b = getattr(layer, 'b', None) add_bias = False # Check if the layer has a nonlinearity nonlinearity = getattr(layer, 'nonlinearity', None) add_nonlinearity = False if b is not None and not isinstance(layer, BiasLayer): layer.b = None del layer.params[b] add_bias = True if (nonlinearity is not None and not isinstance(layer, NonlinearityLayer) and nonlinearity != nonlinearities.identity): layer.nonlinearity = nonlinearities.identity add_nonlinearity = True if add_bias: layer = BiasLayer(incoming=layer, b=b) if add_nonlinearity: layer = NonlinearityLayer(incoming=layer, nonlinearity=nonlinearity) incoming = layer return layer
def batchnorm_pt2(incoming): """2nd part of batch normalization: scaling + biases.""" return BiasLayer(ScaleLayer(incoming))
def build_decoder(net): net['uconv5_3'] = ConvLayer(net['conv5_3'], 512, 3, pad=1) print "uconv5_3: {}".format(net['uconv5_3'].output_shape[1:]) net['uconv5_2'] = ConvLayer(net['uconv5_3'], 512, 3, pad=1) print "uconv5_2: {}".format(net['uconv5_2'].output_shape[1:]) net['uconv5_1'] = ConvLayer(net['uconv5_2'], 512, 3, pad=1) print "uconv5_1: {}".format(net['uconv5_1'].output_shape[1:]) net['upool4'] = Upscale2DLayer(net['uconv5_1'], scale_factor=2) print "upool4: {}".format(net['upool4'].output_shape[1:]) net['uconv4_3'] = ConvLayer(net['upool4'], 512, 3, pad=1) print "uconv4_3: {}".format(net['uconv4_3'].output_shape[1:]) net['uconv4_2'] = ConvLayer(net['uconv4_3'], 512, 3, pad=1) print "uconv4_2: {}".format(net['uconv4_2'].output_shape[1:]) net['uconv4_1'] = ConvLayer(net['uconv4_2'], 512, 3, pad=1) print "uconv4_1: {}".format(net['uconv4_1'].output_shape[1:]) net['upool3'] = Upscale2DLayer(net['uconv4_1'], scale_factor=2) print "upool3: {}".format(net['upool3'].output_shape[1:]) net['uconv3_3'] = ConvLayer(net['upool3'], 256, 3, pad=1) print "uconv3_3: {}".format(net['uconv3_3'].output_shape[1:]) net['uconv3_2'] = ConvLayer(net['uconv3_3'], 256, 3, pad=1) print "uconv3_2: {}".format(net['uconv3_2'].output_shape[1:]) net['uconv3_1'] = ConvLayer(net['uconv3_2'], 256, 3, pad=1) print "uconv3_1: {}".format(net['uconv3_1'].output_shape[1:]) net['upool2'] = Upscale2DLayer(net['uconv3_1'], scale_factor=2) print "upool2: {}".format(net['upool2'].output_shape[1:]) net['uconv2_2'] = ConvLayer(net['upool2'], 128, 3, pad=1) print "uconv2_2: {}".format(net['uconv2_2'].output_shape[1:]) net['uconv2_1'] = ConvLayer(net['uconv2_2'], 128, 3, pad=1) print "uconv2_1: {}".format(net['uconv2_1'].output_shape[1:]) net['upool1'] = Upscale2DLayer(net['uconv2_1'], scale_factor=2) print "upool1: {}".format(net['upool1'].output_shape[1:]) net['uconv1_2'] = ConvLayer( net['upool1'], 64, 3, pad=1, ) print "uconv1_2: {}".format(net['uconv1_2'].output_shape[1:]) net['uconv1_1'] = ConvLayer(net['uconv1_2'], 64, 3, pad=1) print "uconv1_1: {}".format(net['uconv1_1'].output_shape[1:]) net['output_encoder'] = ConvLayer(net['uconv1_1'], 3, 1, pad=0, nonlinearity=tanh) print "output_encoder: {}".format(net['output_encoder'].output_shape[1:]) net['output_encoder_bias'] = BiasLayer(net['output_encoder'], b=lasagne.init.Constant(1)) print "output_encoder_bias: {}".format( net['output_encoder_bias'].output_shape[1:]) net['output_encoder_scaled'] = ScaleLayer( net['output_encoder_bias'], scales=lasagne.init.Constant(127.5)) print "output_encoder_scaled: {}".format( net['output_encoder_scaled'].output_shape[1:]) return net
def build_densenet(l_in, input_var=None, first_output=64, growth_rate=32, num_blocks=4, dropout=0): """ Creates a DenseNet model in Lasagne. Parameters ---------- input_shape : tuple The shape of the input layer, as ``(batchsize, channels, rows, cols)``. Any entry except ``channels`` can be ``None`` to indicate free size. input_var : Theano expression or None Symbolic input variable. Will be created automatically if not given. classes : int The number of classes of the softmax output. first_output : int Number of channels of initial convolution before entering the first dense block, should be of comparable size to `growth_rate`. growth_rate : int Number of feature maps added per layer. num_blocks : int Number of dense blocks (defaults to 3, as in the original paper). dropout : float The dropout rate. Set to zero (the default) to disable dropout. batchsize : int or None The batch size to build the model for, or ``None`` (the default) to allow any batch size. inputsize : int, tuple of int or None Returns ------- network : Layer instance Lasagne Layer instance for the output layer. References ---------- .. [1] Gao Huang et al. (2016): Densely Connected Convolutional Networks. https://arxiv.org/abs/1608.06993 """ nb_layers = [6, 12, 32, 32] # For DenseNet-169 nb_layers = [6, 12, 24, 16] # For DenseNet-121 # initial convolution network = Conv2DLayer(l_in, first_output, filter_size=7, stride=2, pad='same', W=lasagne.init.HeNormal(gain='relu'), b=None, nonlinearity=None, name='pre_conv') network = BatchNormLayer(network, name='pre_bn', beta=None, gamma=None) network = ScaleLayer(network, name='pre_scale') network = BiasLayer(network, name='pre_shift') network = dnn.MaxPool2DDNNLayer(network, pool_size=3, stride=2) # note: The authors' implementation does *not* have a dropout after the # initial convolution. This was missing in the paper, but important. # if dropout: # network = DropoutLayer(network, dropout) # dense blocks with transitions in between for b in range(num_blocks): network = dense_block(network, nb_layers[b], growth_rate, dropout, name_prefix='block%d' % (b + 1)) if b < num_blocks - 1: network = transition(network, dropout, name_prefix='block%d_trs' % (b + 1)) # post processing until prediction network = ScaleLayer(network, name='post_scale') network = BiasLayer(network, name='post_shift') network = NonlinearityLayer(network, nonlinearity=rectify, name='post_relu') return network