def add_layer(incoming, num_channels, dropout): layer = ScaleLayer(incoming) layer = BiasLayer(layer) # Bottleneck layer to reduce number of input channels to 4 times the number of output channels layer = NonlinearityLayer(layer, nonlinearity=rectify) layer = Conv2DLayer(layer, num_filters=4 * num_channels, filter_size=(1, 1), stride=(1, 1), W=HeNormal(gain='relu'), b=None, flip_filters=False, nonlinearity=None) layer = BatchNormLayer(layer, beta=None, gamma=None) if dropout > 0: layer = DropoutLayer(layer, p=dropout) # Convolutional layer (using padding to keep same dimensions) layer = NonlinearityLayer(layer, nonlinearity=rectify) layer = Conv2DLayer(layer, num_filters=num_channels, filter_size=(3, 3), stride=(1, 1), W=HeNormal(gain='relu'), b=None, pad='same', flip_filters=False, nonlinearity=None) layer = BatchNormLayer(layer, beta=None, gamma=None) if dropout > 0: layer = DropoutLayer(layer, p=dropout) # Concatenate the input filters with the new filters layer = ConcatLayer([incoming, layer], axis=1) return layer
def residual_block(l, transition=False, first=False, filters=16): if transition: first_stride = (2, 2) else: first_stride = (1, 1) if first: bn_pre_relu = l else: bn_pre_conv = BatchNormLayer(l) bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify) conv_1 = NonlinearityLayer(BatchNormLayer( ConvLayer(bn_pre_relu, num_filters=filters, filter_size=(3, 3), stride=first_stride, nonlinearity=None, pad='same', W=he_norm)), nonlinearity=rectify) #dropout = DropoutLayer(conv_1, p=0.3) conv_2 = ConvLayer(conv_1, num_filters=filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=he_norm) # add shortcut connections if transition: # projection shortcut, as option B in paper projection = ConvLayer(bn_pre_relu, num_filters=filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None) elif conv_2.output_shape == l.output_shape: projection = l else: projection = ConvLayer(bn_pre_relu, num_filters=filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=None, pad='same', b=None) return ElemwiseSumLayer([conv_2, projection])
def conv_bn_rectify(net, num_filters): net = ConvLayer(net, int(num_filters), 3, W=init.Normal(), pad=1, nonlinearity=None) net = BatchNormLayer(net, epsilon=1e-3) net = ll.NonlinearityLayer(net) return net
def bn_relu_conv(network, channels, filter_size, stride, dropout, name_prefix): network = BatchNormLayer(network, name=name_prefix + '_bn') network = NonlinearityLayer(network, nonlinearity=rectify, name=name_prefix + '_relu') network = Conv2DLayer(network, channels, filter_size, stride=stride, pad='same', W=lasagne.init.HeNormal(gain='relu'), b=None, nonlinearity=None, name=name_prefix + '_conv') if dropout: network = DropoutLayer(network, dropout) return network
def conv_bn_rectify(net, num_filters): net = layers.Conv2DVarDropOutARD(net, int(num_filters), 3, W=init.Normal(), pad=1, nonlinearity=nl.linear) net = BatchNormLayer(net, epsilon=1e-3) net = ll.NonlinearityLayer(net) return net
def dense_block(network, transition=False, first=False, filters=16): if transition: network = NonlinearityLayer(BatchNormLayer(network), nonlinearity=rectify) network = ConvLayer(network, network.output_shape[1], 1, pad='same', W=he_norm, b=None, nonlinearity=None) network = Pool2DLayer(network, 2, mode='average_inc_pad') network = NonlinearityLayer(BatchNormLayer(network), nonlinearity=rectify) conv = ConvLayer(network, filters, 3, pad='same', W=he_norm, b=None, nonlinearity=None) return ConcatLayer([network, conv], axis=1)
def model(num_classes=101): l_in = InputLayer(shape=(None, 3, 224, 224)) l = NonlinearityLayer(BatchNormLayer( ConvLayer(l_in, num_filters=64, filter_size=(7, 7), stride=(2, 2), nonlinearity=None, pad='same', W=he_norm)), nonlinearity=rectify) l = MaxPool2DLayer(l, 3, stride=2, pad='same') l = residual_block(l, filters=256, first=True) for _ in range(1, 3): l = residual_block(l, filters=256) l = residual_block(l, filters=512, transition=True) for _ in range(1, 4): l = residual_block(l, filters=512) l = residual_block(l, filters=1024, transition=True) for _ in range(1, 23): l = residual_block(l, filters=1024) l = residual_block(l, filters=2048, transition=True) for _ in range(1, 3): l = residual_block(l, filters=2048) bn_post_conv = BatchNormLayer(l) bn_post_relu = NonlinearityLayer(bn_post_conv, rectify) avg_pool = GlobalPoolLayer(bn_post_relu) return DenseLayer(avg_pool, num_units=num_classes, W=HeNormal(), nonlinearity=softmax) #lasagne.init.HeNormal(gain=1)
def transition(network, dropout, name_prefix): # a transition 1x1 convolution followed by avg-pooling network = affine_relu_conv(network, channels=network.output_shape[1], filter_size=3, stride=2, dropout=dropout, name_prefix=name_prefix) # network = Pool2DLayer(network, 2, mode='average_inc_pad', # name=name_prefix + '_pool') network = BatchNormLayer(network, name=name_prefix + '_bn', beta=None, gamma=None) return network
def dense_block(network, num_layers, growth_rate, dropout, name_prefix): # concatenated 3x3 convolutions for n in range(num_layers): conv = affine_relu_conv(network, channels=growth_rate, filter_size=3, dropout=dropout, name_prefix=name_prefix + '_l%02d' % (n + 1)) conv = BatchNormLayer(conv, name=name_prefix + '_l%02dbn' % (n + 1), beta=None, gamma=None) network = ConcatLayer([network, conv], axis=1, name=name_prefix + '_l%02d_join' % (n + 1)) return network
def net_vgglike(k, input_shape, nclass): input_x, target_y, Winit = T.tensor4("input"), T.vector( "target", dtype='int32'), init.Normal() net = ll.InputLayer(input_shape, input_x) net = conv_bn_rectify(net, 64 * k) net = ll.DropoutLayer(net, 0.3) net = conv_bn_rectify(net, 64 * k) net = MaxPool2DLayer(net, 2, 2) net = conv_bn_rectify(net, 128 * k) net = ll.DropoutLayer(net, 0.4) net = conv_bn_rectify(net, 128 * k) net = MaxPool2DLayer(net, 2, 2) net = conv_bn_rectify(net, 256 * k) net = ll.DropoutLayer(net, 0.4) net = conv_bn_rectify(net, 256 * k) net = ll.DropoutLayer(net, 0.4) net = conv_bn_rectify(net, 256 * k) net = MaxPool2DLayer(net, 2, 2) net = conv_bn_rectify(net, 512 * k) net = ll.DropoutLayer(net, 0.4) net = conv_bn_rectify(net, 512 * k) net = ll.DropoutLayer(net, 0.4) net = conv_bn_rectify(net, 512 * k) net = MaxPool2DLayer(net, 2, 2) net = conv_bn_rectify(net, 512 * k) net = ll.DropoutLayer(net, 0.4) net = conv_bn_rectify(net, 512 * k) net = ll.DropoutLayer(net, 0.4) net = conv_bn_rectify(net, 512 * k) net = MaxPool2DLayer(net, 2, 2) net = ll.DenseLayer(net, int(512 * k), W=init.Normal(), nonlinearity=nl.rectify) net = BatchNormLayer(net, epsilon=1e-3) net = ll.NonlinearityLayer(net) net = ll.DropoutLayer(net, 0.5) net = ll.DenseLayer(net, nclass, W=init.Normal(), nonlinearity=nl.softmax) return net, input_x, target_y, k
def add_transition(incoming, num_filters, dropout): layer = ScaleLayer(incoming) layer = BiasLayer(layer) layer = NonlinearityLayer(layer, nonlinearity=rectify) # Reduce the number of filters layer = Conv2DLayer(layer, num_filters=num_filters, filter_size=(1, 1), stride=(1, 1), W=HeNormal(gain='relu'), b=None, flip_filters=False, nonlinearity=None) if dropout > 0: layer = DropoutLayer(layer, p=dropout) # Pooling layer to reduce the last two dimensions by half layer = Pool2DLayer(layer, pool_size=(2, 2), stride=(2, 2), mode='average_exc_pad') layer = BatchNormLayer(layer, beta=None, gamma=None) return layer
def build_densenet(l_in, input_var=None, first_output=64, growth_rate=32, num_blocks=4, dropout=0): """ Creates a DenseNet model in Lasagne. Parameters ---------- input_shape : tuple The shape of the input layer, as ``(batchsize, channels, rows, cols)``. Any entry except ``channels`` can be ``None`` to indicate free size. input_var : Theano expression or None Symbolic input variable. Will be created automatically if not given. classes : int The number of classes of the softmax output. first_output : int Number of channels of initial convolution before entering the first dense block, should be of comparable size to `growth_rate`. growth_rate : int Number of feature maps added per layer. num_blocks : int Number of dense blocks (defaults to 3, as in the original paper). dropout : float The dropout rate. Set to zero (the default) to disable dropout. batchsize : int or None The batch size to build the model for, or ``None`` (the default) to allow any batch size. inputsize : int, tuple of int or None Returns ------- network : Layer instance Lasagne Layer instance for the output layer. References ---------- .. [1] Gao Huang et al. (2016): Densely Connected Convolutional Networks. https://arxiv.org/abs/1608.06993 """ nb_layers = [6, 12, 32, 32] # For DenseNet-169 nb_layers = [6, 12, 24, 16] # For DenseNet-121 # initial convolution network = Conv2DLayer(l_in, first_output, filter_size=7, stride=2, pad='same', W=lasagne.init.HeNormal(gain='relu'), b=None, nonlinearity=None, name='pre_conv') network = BatchNormLayer(network, name='pre_bn', beta=None, gamma=None) network = ScaleLayer(network, name='pre_scale') network = BiasLayer(network, name='pre_shift') network = dnn.MaxPool2DDNNLayer(network, pool_size=3, stride=2) # note: The authors' implementation does *not* have a dropout after the # initial convolution. This was missing in the paper, but important. # if dropout: # network = DropoutLayer(network, dropout) # dense blocks with transitions in between for b in range(num_blocks): network = dense_block(network, nb_layers[b], growth_rate, dropout, name_prefix='block%d' % (b + 1)) if b < num_blocks - 1: network = transition(network, dropout, name_prefix='block%d_trs' % (b + 1)) # post processing until prediction network = ScaleLayer(network, name='post_scale') network = BiasLayer(network, name='post_shift') network = NonlinearityLayer(network, nonlinearity=rectify, name='post_relu') return network
def residual_bottleneck_block(l, transition=False, first=False, filters=16): if transition: first_stride = (2, 2) else: first_stride = (1, 1) if first: bn_pre_relu = l else: bn_pre_conv = BatchNormLayer(l) bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify) bottleneck_filters = filters / 4 conv_1 = NonlinearityLayer(BatchNormLayer( ConvLayer(bn_pre_relu, num_filters=bottleneck_filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=None, pad='same', W=he_norm)), nonlinearity=rectify) conv_2 = NonlinearityLayer(BatchNormLayer( ConvLayer(conv_1, num_filters=bottleneck_filters, filter_size=(3, 3), stride=first_stride, nonlinearity=None, pad='same', W=he_norm)), nonlinearity=rectify) conv_3 = ConvLayer(conv_2, num_filters=filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=None, pad='same', W=he_norm) if transition: projection = ConvLayer(bn_pre_relu, num_filters=filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None) elif first: projection = ConvLayer(bn_pre_relu, num_filters=filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=None, pad='same', b=None) else: projection = l return ElemwiseSumLayer([conv_3, projection])
def ResNet_FullPre_Wide(input_var=None, nout=10, n=3, k=2, dropoutrate=0): def gelu(x): return 0.5 * x * ( 1 + T.tanh(T.sqrt(2 / np.pi) * (x + 0.044715 * T.pow(x, 3)))) f = gelu ''' Adapted from https://gist.github.com/FlorianMuellerklein/3d9ba175038a3f2e7de3794fa303f1ee which was tweaked to be consistent with 'Identity Mappings in Deep Residual Networks', Kaiming He et al. 2016 (https://arxiv.org/abs/1603.05027) And 'Wide Residual Networks', Sergey Zagoruyko, Nikos Komodakis 2016 (http://arxiv.org/pdf/1605.07146v1.pdf) ''' n_filters = {0: 16, 1: 16 * k, 2: 32 * k, 3: 64 * k} # create a residual learning building block with two stacked 3x3 convlayers and dropout def residual_block(l, first=False, increase_dim=False, filters=16): if increase_dim: first_stride = (2, 2) else: first_stride = (1, 1) conv_1 = ConvLayer(l, num_filters=filters, filter_size=(3, 3), stride=first_stride, nonlinearity=f, pad='same', W=HeNormal(gain='relu')) if dropoutrate > 0: # with dropout dropout = DropoutLayer(conv_1, p=dropoutrate) # contains the last weight portion, step 6 conv_2 = ConvLayer(dropout, num_filters=filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=f, pad='same', W=HeNormal(gain='relu')) else: # without dropout conv_2 = ConvLayer(conv_1, num_filters=filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=f, pad='same', W=HeNormal(gain='relu')) stack_3 = BatchNormLayer(conv_2) # add shortcut connections if increase_dim: # projection shortcut, as option B in paper projection = ConvLayer(l, num_filters=filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([stack_3, projection]) elif first: # projection shortcut, as option B in paper projection = ConvLayer(l, num_filters=filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([stack_3, projection]) else: block = ElemwiseSumLayer([stack_3, l]) return block # Building the network l_in = InputLayer(shape=(None, 3, 32, 32), input_var=input_var) # we're normalizing the input as the net sees fit, and we normalize the output l = batch_norm( ConvLayer(l_in, num_filters=n_filters[0], filter_size=(3, 3), stride=(1, 1), nonlinearity=f, pad='same', W=HeNormal(gain='relu'))) l = BatchNormLayer(l) # first stack of residual blocks l = residual_block(l, first=True, filters=n_filters[1]) for _ in range(1, n): l = residual_block(l, filters=n_filters[1]) # second stack of residual blocks l = residual_block(l, increase_dim=True, filters=n_filters[2]) for _ in range(1, n): l = residual_block(l, filters=n_filters[2]) # third stack of residual blocks l = residual_block(l, increase_dim=True, filters=n_filters[3]) for _ in range(1, n): l = residual_block(l, filters=n_filters[3]) bn_post_conv = BatchNormLayer(l) bn_post_relu = NonlinearityLayer(bn_post_conv, f) # average pooling avg_pool = GlobalPoolLayer(bn_post_relu) # fully connected layer network = DenseLayer(avg_pool, num_units=nout, W=HeNormal(), nonlinearity=softmax) return network
def build_densenet( input_var, input_shape=(None, 3, 224, 224), num_filters_init=64, growth_rate=32, dropout=0.2, num_classes=1000, stages=[6, 12, 24, 16]): if input_shape[2] % (2 ** len(stages)) != 0: raise ValueError("input_shape[2] must be a multiple of {}.".format(2 ** len(stages))) if input_shape[3] % (2 ** len(stages)) != 0: raise ValueError("input_shape[3] must be a multiple of {}.".format(2 ** len(stages))) # Input should be (BATCH_SIZE, NUM_CHANNELS, WIDTH, HEIGHT) # NUM_CHANNELS is usually 3 (R,G,B) and for the ImageNet example the width and height are 224 network = InputLayer(input_shape, input_var) # Apply 2D convolutions with a 7x7 filter (pad by 3 on each side) # Because of the 2x2 stride the shape of the last two dimensions will be half the size of the input (112x112) network = Conv2DLayer(network, num_filters=num_filters_init, filter_size=(7, 7), stride=(2, 2), pad=(3, 3), W=HeNormal(gain='relu'), b=None, flip_filters=False, nonlinearity=None) # Batch normalize network = BatchNormLayer(network, beta=None, gamma=None) # If dropout is enabled, apply after every convolutional and dense layer if dropout > 0: network = DropoutLayer(network, p=dropout) # Apply ReLU network = NonlinearityLayer(network, nonlinearity=rectify) # Keep the maximum value of a 3x3 pool with a 2x2 stride # This operation again divides the size of the last two dimensions by two (56x56) network = MaxPool2DLayer(network, pool_size=(3, 3), stride=(2, 2), pad=(1, 1)) # Add dense blocks for i, num_layers in enumerate(stages): # Except for the first block, we add a transition layer before the dense block that halves the number of filters, width and height if i > 0: network = add_transition(network, math.floor(network.output_shape[1] / 2), dropout) network = build_block(network, num_layers, growth_rate, dropout) # Apply global pooling and add a fully connected layer with softmax function network = ScaleLayer(network) network = BiasLayer(network) network = NonlinearityLayer(network, nonlinearity=rectify) network = GlobalPoolLayer(network) network = DenseLayer(network, num_units=num_classes, W=HeNormal(gain=1), nonlinearity=softmax) return network
def build_densenet(input_shape=(None, 3, 32, 32), input_var=None, classes=10, depth=40, first_output=16, growth_rate=12, num_blocks=3, dropout=0): """ Creates a DenseNet model in Lasagne. Parameters ---------- input_shape : tuple The shape of the input layer, as ``(batchsize, channels, rows, cols)``. Any entry except ``channels`` can be ``None`` to indicate free size. input_var : Theano expression or None Symbolic input variable. Will be created automatically if not given. classes : int The number of classes of the softmax output. depth : int Depth of the network. Must be ``num_blocks * n + 1`` for some ``n``. (Parameterizing by depth rather than n makes it easier to follow the paper.) first_output : int Number of channels of initial convolution before entering the first dense block, should be of comparable size to `growth_rate`. growth_rate : int Number of feature maps added per layer. num_blocks : int Number of dense blocks (defaults to 3, as in the original paper). dropout : float The dropout rate. Set to zero (the default) to disable dropout. batchsize : int or None The batch size to build the model for, or ``None`` (the default) to allow any batch size. inputsize : int, tuple of int or None Returns ------- network : Layer instance Lasagne Layer instance for the output layer. References ---------- .. [1] Gao Huang et al. (2016): Densely Connected Convolutional Networks. https://arxiv.org/abs/1608.06993 """ if (depth - 1) % num_blocks != 0: raise ValueError("depth must be num_blocks * n + 1 for some n") # input and initial convolution network = InputLayer(input_shape, input_var, name='input') network = Conv2DLayer(network, first_output, 3, pad='same', W=lasagne.init.HeNormal(gain='relu'), b=None, nonlinearity=None, name='pre_conv') network = BatchNormLayer(network, name='pre_bn', beta=None, gamma=None) # note: The authors' implementation does *not* have a dropout after the # initial convolution. This was missing in the paper, but important. # if dropout: # network = DropoutLayer(network, dropout) # dense blocks with transitions in between n = (depth - 1) // num_blocks for b in range(num_blocks): network = dense_block(network, n - 1, growth_rate, dropout, name_prefix='block%d' % (b + 1)) if b < num_blocks - 1: network = transition(network, dropout, name_prefix='block%d_trs' % (b + 1)) # post processing until prediction network = ScaleLayer(network, name='post_scale') network = BiasLayer(network, name='post_shift') network = NonlinearityLayer(network, nonlinearity=rectify, name='post_relu') network = GlobalPoolLayer(network, name='post_pool') network = DenseLayer(network, classes, nonlinearity=softmax, W=lasagne.init.HeNormal(gain=1), name='output') return network
def model(shape, n=18, num_filters=16, num_classes=10, width=1, block='normal'): if block == "normal": from residual_block import residual_block n_filters = { 0: num_filters, 1: num_filters * width, 2: num_filters * 2 * width, 3: num_filters * 4 * width } elif block == "dense": from residual_block import dense_block as residual_block growth_rate = 12 n_filters = { 0: num_filters, 1: growth_rate, 2: growth_rate, 3: growth_rate } elif block == "dense_fast": from residual_block import dense_fast_block as residual_block growth_rate = 12 n_filters = { 0: num_filters, 1: growth_rate, 2: growth_rate, 3: growth_rate } else: from residual_block import residual_bottleneck_block as residual_block n_filters = { 0: num_filters, 1: num_filters * 4, 2: num_filters * 8, 3: num_filters * 16 } l_in = InputLayer(shape=(None, shape[1], shape[2], shape[3])) l = NonlinearityLayer(BatchNormLayer( ConvLayer(l_in, num_filters=n_filters[0], filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=he_norm)), nonlinearity=rectify) l = residual_block(l, first=True, filters=n_filters[1]) for _ in range(1, n): l = residual_block(l, filters=n_filters[1]) l = residual_block(l, transition=True, filters=n_filters[2]) for _ in range(1, n): l = residual_block(l, filters=n_filters[2]) l = residual_block(l, transition=True, filters=n_filters[3]) for _ in range(1, n): l = residual_block(l, filters=n_filters[3]) bn_post_conv = BatchNormLayer(l) bn_post_relu = NonlinearityLayer(bn_post_conv, rectify) avg_pool = GlobalPoolLayer(bn_post_relu) return DenseLayer(avg_pool, num_units=num_classes, W=HeNormal(), nonlinearity=softmax) #lasagne.init.HeNormal(gain=1)
def residual_block(l, first=False, increase_dim=False, filters=16): if increase_dim: first_stride = (2, 2) else: first_stride = (1, 1) conv_1 = ConvLayer(l, num_filters=filters, filter_size=(3, 3), stride=first_stride, nonlinearity=f, pad='same', W=HeNormal(gain='relu')) if dropoutrate > 0: # with dropout dropout = DropoutLayer(conv_1, p=dropoutrate) # contains the last weight portion, step 6 conv_2 = ConvLayer(dropout, num_filters=filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=f, pad='same', W=HeNormal(gain='relu')) else: # without dropout conv_2 = ConvLayer(conv_1, num_filters=filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=f, pad='same', W=HeNormal(gain='relu')) stack_3 = BatchNormLayer(conv_2) # add shortcut connections if increase_dim: # projection shortcut, as option B in paper projection = ConvLayer(l, num_filters=filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([stack_3, projection]) elif first: # projection shortcut, as option B in paper projection = ConvLayer(l, num_filters=filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=None, pad='same', b=None) block = ElemwiseSumLayer([stack_3, projection]) else: block = ElemwiseSumLayer([stack_3, l]) return block