def mobilenet_block(net, num_out_channels, stride=[1, 1], scope=None):
     """Basic MobileNet block combining:
      - depthwise conv + BN + relu
      - 1x1 conv + BN + relu
     """
     with tf.variable_scope(scope, 'block', [net]) as sc:
         num_out_channels = int(num_out_channels * width_multiplier)
         if stride[0] == 1 and stride[1] == 1:
             # Depthwise convolution with stride=1
             net = custom_layers.depthwise_convolution2d(net,
                                                         kernel_size,
                                                         depth_multiplier=1,
                                                         stride=stride,
                                                         scope='conv_dw')
         else:
             # Mimic CAFFE padding if stride > 1.
             net = custom_layers.pad2d(net, pad=padding)
             net = custom_layers.depthwise_convolution2d(net,
                                                         kernel_size,
                                                         padding='VALID',
                                                         depth_multiplier=1,
                                                         stride=stride,
                                                         scope='conv_dw')
         # Pointwise convolution.
         net = slim.conv2d(net, num_out_channels, [1, 1], scope='conv_pw')
         return net
 def max_avg_pool2d(net, stride=[2, 2], scope=None):
     with tf.variable_scope(scope, 'max_avg_pool', [net]) as sc:
         ksize = [3, 3]
         padding = [1, 1]
         # Additional Caffe padding.
         net = custom_layers.pad2d(net, pad=padding)
         # Max + Avg pooling.
         mnet = slim.max_pool2d(net, ksize, stride, padding='VALID')
         anet = slim.avg_pool2d(net, ksize, stride, padding='VALID')
         return mnet + anet
 def mobilenet_block(net, num_out_channels, stride=[1, 1],
                     leaders=False, scope=None):
     """Basic MobileNet block combining:
      - depthwise conv + BN + relu
      - 1x1 conv + BN + relu
     """
     with tf.variable_scope(scope, 'block', [net]) as sc:
         num_out_channels = int(num_out_channels * width_multiplier)
         kernel_size = [3, 3]
         if stride[0] == 1 and stride[1] == 1:
             # Classic depthwise convolution with stride=1
             net = custom_layers.depthwise_convolution2d(
                 net, kernel_size,
                 depth_multiplier=1, stride=stride,
                 scope='conv_dw')
         else:
             if leaders:
                 # Special Depthwise Leader convolution when stride > 1
                 # net = custom_layers.pad2d(net, pad=(1, 1))
                 net = custom_layers.depthwise_leaders_convolution2d(
                     net,
                     kernel_size,
                     padding='SAME',
                     stride=stride,
                     rates=[1, 2, 3],
                     pooling_sizes=[5, 3, 1],
                     pooling_type='AVG',
                     activation_fn=tf.nn.relu,
                     scope='conv_lead_dw')
             else:
                 # Mimic CAFFE padding if stride > 1.
                 net = custom_layers.pad2d(net, pad=(1, 1))
                 net = custom_layers.depthwise_convolution2d(
                     net, kernel_size, padding='VALID',
                     depth_multiplier=1, stride=stride,
                     scope='conv_dw')
         # Pointwise convolution.
         net = slim.conv2d(net, num_out_channels, [1, 1],
                           scope='conv_pw')
         return net
def mobilenets(inputs,
               num_classes=1000,
               width_multiplier=1.0,
               is_training=True,
               dropout_keep_prob=0.5,
               scope='MobileNets'):
    """MobileNets implementation.
    Args:
        inputs: a tensor of size [batch_size, height, width, channels].
        num_classes: number of predicted classes.
        is_training: whether or not the model is being trained.
        dropout_keep_prob: the probability that activations are kept in the dropout
            layers during training.
        scope: Optional scope for the variables.

    Returns:
        the last op containing the log predictions and end_points dict.
    """
    kernel_size = [3, 3]
    padding = [(kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2]

    def mobilenet_block(net, num_out_channels, stride=[1, 1], scope=None):
        """Basic MobileNet block combining:
         - depthwise conv + BN + relu
         - 1x1 conv + BN + relu
        """
        with tf.variable_scope(scope, 'block', [net]) as sc:
            num_out_channels = int(num_out_channels * width_multiplier)
            if stride[0] == 1 and stride[1] == 1:
                # Depthwise convolution with stride=1
                net = custom_layers.depthwise_convolution2d(net,
                                                            kernel_size,
                                                            depth_multiplier=1,
                                                            stride=stride,
                                                            scope='conv_dw')
            else:
                # Mimic CAFFE padding if stride > 1.
                net = custom_layers.pad2d(net, pad=padding)
                net = custom_layers.depthwise_convolution2d(net,
                                                            kernel_size,
                                                            padding='VALID',
                                                            depth_multiplier=1,
                                                            stride=stride,
                                                            scope='conv_dw')
            # Pointwise convolution.
            net = slim.conv2d(net, num_out_channels, [1, 1], scope='conv_pw')
            return net

    with tf.variable_scope(scope, 'MobileNets', [inputs]) as sc:
        end_points = {}
        # First full convolution...
        net = custom_layers.pad2d(inputs, pad=padding)
        net = slim.conv2d(net,
                          32,
                          kernel_size,
                          stride=[2, 2],
                          padding='VALID',
                          scope='conv1')
        # net = slim.conv2d(inputs, 32, [ksize, ksize], stride=[2, 2], scope='conv1')
        # Then, MobileNet blocks!
        net = mobilenet_block(net, 64, scope='block2')
        net = mobilenet_block(net, 128, stride=[2, 2], scope='block3')
        net = mobilenet_block(net, 128, scope='block4')
        net = mobilenet_block(net, 256, stride=[2, 2], scope='block5')
        net = mobilenet_block(net, 256, scope='block6')
        net = mobilenet_block(net, 512, stride=[2, 2], scope='block7')
        # Intermediate blocks...
        for i in range(5):
            net = mobilenet_block(net, 512, scope='block%i' % (i + 8))
        # Final blocks.
        net = mobilenet_block(net, 1024, stride=[2, 2], scope='block13')
        net = mobilenet_block(net, 1024, scope='block14')
        # Spatial pooling + fully connected layer.
        net = custom_layers.spatial_mean(net,
                                         keep_dims=True,
                                         scope='spatial_mean14')
        net = slim.conv2d(net,
                          1000, [1, 1],
                          activation_fn=None,
                          normalizer_fn=None,
                          normalizer_params=None,
                          biases_initializer=tf.zeros_initializer(),
                          scope='conv_fc15')
        net = custom_layers.spatial_squeeze(net)
        # net = slim.fully_connected(net, 1000,  scope='fc15')

        # Logits padding...
        net = custom_layers.pad_logits(net, pad=(num_classes - 1000, 0))
        return net, end_points
Exemple #5
0
def mobilenets_btree(inputs,
                     num_classes=1000,
                     kernel_size=[3, 3],
                     width_multiplier=1.0,
                     dropouts=[0.5],
                     pad_logits=True,
                     is_training=True,
                     reuse=None,
                     scope='MobileNets'):
    """MobileNets implementation.
    Args:
        inputs: a tensor of size [batch_size, height, width, channels].
        num_classes: number of predicted classes.
        is_training: whether or not the model is being trained.
        dropout_keep_prob: the probability that activations are kept in the dropout
            layers during training.
        scope: Optional scope for the variables.

    Returns:
        the last op containing the log predictions and end_points dict.
    """
    # MobileNets kernel size and padding (for layers with stride > 1).
    # kernel_size = [3, 3]
    padding = [(kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2]

    def mobilenet_block(net, num_out_channels, stride=[1, 1], scope=None):
        """Basic MobileNet block combining:
         - depthwise conv + BN + relu
         - 1x1 conv + BN + relu
        """
        with tf.variable_scope(scope, 'block', [net]) as sc:
            num_out_channels = int(num_out_channels * width_multiplier)
            if stride[0] == 1 and stride[1] == 1:
                # Depthwise convolution with stride=1
                net = custom_layers.depthwise_convolution2d(net,
                                                            kernel_size,
                                                            depth_multiplier=1,
                                                            stride=stride,
                                                            scope='conv_dw')
            else:
                # Mimic CAFFE padding if stride > 1 => usually better accuracy.
                net = custom_layers.pad2d(net, pad=padding)
                net = custom_layers.depthwise_convolution2d(net,
                                                            kernel_size,
                                                            padding='VALID',
                                                            depth_multiplier=1,
                                                            stride=stride,
                                                            scope='conv_dw')
            # Pointwise convolution.
            net = slim.conv2d(net, num_out_channels, [1, 1], scope='conv_pw')
            return net

    def mobilenet_block_btree_v1(net,
                                 num_out_channels,
                                 stride=[1, 1],
                                 split=2,
                                 scope=None):
        """Basic MobileNet block combining:
         - depthwise conv + BN + relu
         - 1x1 conv + BN + relu
        """
        with tf.variable_scope(scope, 'block', [net]) as sc:
            num_out_channels = int(num_out_channels * width_multiplier)
            # Depthwise convolution with stride=1
            net = custom_layers.depthwise_convolution2d(net,
                                                        kernel_size,
                                                        depth_multiplier=1,
                                                        stride=stride,
                                                        scope='conv_dw')
            # Split-pointwise convolution.
            net = btree_layers.conv2d_1x1_split(net,
                                                num_out_channels,
                                                split=split,
                                                scope='conv_pw_split')
            return net

    def mobilenet_block_btree_v2(net,
                                 num_out_channels,
                                 stride=[1, 1],
                                 split=2,
                                 scope=None):
        """Combination of ResNets block and B-tree.
        """
        with tf.variable_scope(scope, 'block', [net]) as sc:
            # Start with Batch Norm.
            net = custom_layers.batch_norm(net)
            # Depthwise convolution with stride=1
            net = custom_layers.depthwise_convolution2d(net,
                                                        kernel_size,
                                                        depth_multiplier=1,
                                                        stride=stride,
                                                        activation_fn=None,
                                                        scope='conv_dw')
            # Split-pointwise convolution.
            num_out_channels = int(num_out_channels * width_multiplier)
            net = btree_layers.conv2d_1x1_split(net,
                                                num_out_channels,
                                                split=split,
                                                activation_fn=None,
                                                normalizer_fn=None,
                                                scope='conv_pw_split')
            return net

    with tf.variable_scope(scope, 'MobileNets', [inputs], reuse=reuse) as sc:
        end_points = {}
        # First full convolution...
        net = custom_layers.pad2d(inputs, pad=padding)
        net = slim.conv2d(net,
                          32,
                          kernel_size,
                          stride=[2, 2],
                          padding='VALID',
                          scope='conv1')
        # net = slim.conv2d(inputs, 32, kernel_size, stride=[2, 2],
        #                   padding='SAME', scope='conv1')
        # Then, MobileNet blocks!
        net = mobilenet_block(net, 64, scope='block2')
        net = mobilenet_block(net, 128, stride=[2, 2], scope='block3')
        net = mobilenet_block(net, 128, scope='block4')
        net = mobilenet_block(net, 256, stride=[2, 2], scope='block5')
        net = mobilenet_block(net, 256, scope='block6')
        net = mobilenet_block(net, 512, stride=[2, 2], scope='block7')
        # Intermediate blocks...
        for i in range(8, 16):
            with tf.variable_scope(scope, 'resblock_%i' % i, [net]) as sc:
                # Residual block...
                res = net
                net = mobilenet_block_btree_v2(net,
                                               512,
                                               split=4,
                                               scope='block%i_a' % i)
                net = btree_layers.translate_channels(
                    net, delta=64, scope='ch_translate_%i_a' % i)
                net = mobilenet_block_btree_v2(net,
                                               512,
                                               split=4,
                                               scope='block%i_b' % i)
                net = btree_layers.translate_channels(
                    net, delta=-64, scope='ch_translate_%i_b' % i)
                net = mobilenet_block_btree_v2(net,
                                               512,
                                               split=4,
                                               scope='block%i_c' % i)
                net = tf.add(res, net, 'residual_sum_%i' % i)
        net = custom_layers.batch_norm(net)

        # Final blocks.
        net = mobilenet_block(net, 1024, stride=[2, 2], scope='block13')
        net = mobilenet_block(net, 1024, scope='block14')
        # Spatial pooling + fully connected layer.
        net = custom_layers.spatial_mean(net,
                                         keep_dims=True,
                                         scope='spatial_mean14')
        net = slim.conv2d(net,
                          num_classes, [1, 1],
                          activation_fn=None,
                          normalizer_fn=None,
                          normalizer_params=None,
                          biases_initializer=tf.zeros_initializer(),
                          scope='conv_fc15')
        net = custom_layers.spatial_squeeze(net)

        # Logits padding: get everyone to the same number of classes.
        if pad_logits:
            net = custom_layers.pad_logits(net, pad=(num_classes - 1000, 0))
        return net, end_points