def iniatial_block(inputs, name_scope='iniatial_block'): ''' The initial block for Enet has 2 branches: The convolution branch and Maxpool branch. The conv branch has 13 filters, while the maxpool branch gives 3 channels corresponding to the RGB channels. Both output layers are then concatenated to give an output of 16 channels. :param inputs(Tensor): A 4D tensor of shape [batch_size, height, width, channels] :return net_concatenated(Tensor): a 4D Tensor of new shape [batch_size, height, width, channels] ''' # Convolutional branch with scope(name_scope): net_conv = conv(inputs, 13, 3, stride=2, padding=1) net_conv = bn(net_conv) net_conv = fluid.layers.prelu(net_conv, 'channel') # Max pool branch net_pool = max_pool(inputs, [2, 2], stride=2, padding='SAME') # Concatenated output - does it matter max pool comes first or conv comes first? probably not. net_concatenated = fluid.layers.concat([net_conv, net_pool], axis=1) return net_concatenated
def bottleneck(inputs, output_depth, filter_size, regularizer_prob, projection_ratio=4, type=REGULAR, seed=0, output_shape=None, dilation_rate=None, decoder=False, name_scope='bottleneck'): # Calculate the depth reduction based on the projection ratio used in 1x1 convolution. reduced_depth = int(inputs.shape[1] / projection_ratio) # DOWNSAMPLING BOTTLENECK if type == DOWNSAMPLING: #=============MAIN BRANCH============= #Just perform a max pooling with scope('down_sample'): inputs_shape = inputs.shape with scope('main_max_pool'): net_main = fluid.layers.conv2d(inputs, inputs_shape[1], filter_size=3, stride=2, padding='SAME') #First get the difference in depth to pad, then pad with zeros only on the last dimension. depth_to_pad = abs(inputs_shape[1] - output_depth) paddings = [0, 0, 0, depth_to_pad, 0, 0, 0, 0] with scope('main_padding'): net_main = fluid.layers.pad(net_main, paddings=paddings) with scope('block1'): net = conv(inputs, reduced_depth, [2, 2], stride=2, padding='same') net = bn(net) net = prelu(net, decoder=decoder) with scope('block2'): net = conv(net, reduced_depth, [filter_size, filter_size], padding='same') net = bn(net) net = prelu(net, decoder=decoder) with scope('block3'): net = conv(net, output_depth, [1, 1], padding='same') net = bn(net) net = prelu(net, decoder=decoder) # Regularizer net = fluid.layers.dropout(net, regularizer_prob, seed=seed) # Finally, combine the two branches together via an element-wise addition net = fluid.layers.elementwise_add(net, net_main) net = prelu(net, decoder=decoder) return net, inputs_shape # DILATION CONVOLUTION BOTTLENECK # Everything is the same as a regular bottleneck except for the dilation rate argument elif type == DILATED: #Check if dilation rate is given if not dilation_rate: raise ValueError('Dilation rate is not given.') with scope('dilated'): # Save the main branch for addition later net_main = inputs # First projection with 1x1 kernel (dimensionality reduction) with scope('block1'): net = conv(inputs, reduced_depth, [1, 1]) net = bn(net) net = prelu(net, decoder=decoder) # Second conv block --- apply dilated convolution here with scope('block2'): net = conv(net, reduced_depth, filter_size, padding='SAME', dilation=dilation_rate) net = bn(net) net = prelu(net, decoder=decoder) # Final projection with 1x1 kernel (Expansion) with scope('block3'): net = conv(net, output_depth, [1, 1]) net = bn(net) net = prelu(net, decoder=decoder) # Regularizer net = fluid.layers.dropout(net, regularizer_prob, seed=seed) net = prelu(net, decoder=decoder) # Add the main branch net = fluid.layers.elementwise_add(net_main, net) net = prelu(net, decoder=decoder) return net # ASYMMETRIC CONVOLUTION BOTTLENECK # Everything is the same as a regular bottleneck except for a [5,5] kernel decomposed into two [5,1] then [1,5] elif type == ASYMMETRIC: # Save the main branch for addition later with scope('asymmetric'): net_main = inputs # First projection with 1x1 kernel (dimensionality reduction) with scope('block1'): net = conv(inputs, reduced_depth, [1, 1]) net = bn(net) net = prelu(net, decoder=decoder) # Second conv block --- apply asymmetric conv here with scope('block2'): with scope('asymmetric_conv2a'): net = conv(net, reduced_depth, [filter_size, 1], padding='same') with scope('asymmetric_conv2b'): net = conv(net, reduced_depth, [1, filter_size], padding='same') net = bn(net) net = prelu(net, decoder=decoder) # Final projection with 1x1 kernel with scope('block3'): net = conv(net, output_depth, [1, 1]) net = bn(net) net = prelu(net, decoder=decoder) # Regularizer net = fluid.layers.dropout(net, regularizer_prob, seed=seed) net = prelu(net, decoder=decoder) # Add the main branch net = fluid.layers.elementwise_add(net_main, net) net = prelu(net, decoder=decoder) return net # UPSAMPLING BOTTLENECK # Everything is the same as a regular one, except convolution becomes transposed. elif type == UPSAMPLING: #Check if pooling indices is given #Check output_shape given or not if output_shape is None: raise ValueError('Output depth is not given') #=======MAIN BRANCH======= #Main branch to upsample. output shape must match with the shape of the layer that was pooled initially, in order #for the pooling indices to work correctly. However, the initial pooled layer was padded, so need to reduce dimension #before unpooling. In the paper, padding is replaced with convolution for this purpose of reducing the depth! with scope('upsampling'): with scope('unpool'): net_unpool = conv(inputs, output_depth, [1, 1]) net_unpool = bn(net_unpool) net_unpool = fluid.layers.resize_bilinear( net_unpool, out_shape=output_shape[2:]) # First 1x1 projection to reduce depth with scope('block1'): net = conv(inputs, reduced_depth, [1, 1]) net = bn(net) net = prelu(net, decoder=decoder) with scope('block2'): net = deconv(net, reduced_depth, filter_size=filter_size, stride=2, padding='same') net = bn(net) net = prelu(net, decoder=decoder) # Final projection with 1x1 kernel with scope('block3'): net = conv(net, output_depth, [1, 1]) net = bn(net) net = prelu(net, decoder=decoder) # Regularizer net = fluid.layers.dropout(net, regularizer_prob, seed=seed) net = prelu(net, decoder=decoder) # Finally, add the unpooling layer and the sub branch together net = fluid.layers.elementwise_add(net, net_unpool) net = prelu(net, decoder=decoder) return net # REGULAR BOTTLENECK else: with scope('regular'): net_main = inputs # First projection with 1x1 kernel with scope('block1'): net = conv(inputs, reduced_depth, [1, 1]) net = bn(net) net = prelu(net, decoder=decoder) # Second conv block with scope('block2'): net = conv(net, reduced_depth, [filter_size, filter_size], padding='same') net = bn(net) net = prelu(net, decoder=decoder) # Final projection with 1x1 kernel with scope('block3'): net = conv(net, output_depth, [1, 1]) net = bn(net) net = prelu(net, decoder=decoder) # Regularizer net = fluid.layers.dropout(net, regularizer_prob, seed=seed) net = prelu(net, decoder=decoder) # Add the main branch net = fluid.layers.elementwise_add(net_main, net) net = prelu(net, decoder=decoder) return net