Esempio n. 1
0
def vgg_16(inputs,
           num_classes=1000,
           is_training=True,
           dropout_keep_prob=0.5,
           spatial_squeeze=True,
           scope='vgg_16'):
    """Oxford Net VGG 16-Layers version D Example.

    Note: All the fully_connected layers have been transformed to conv2d layers.
                To use in classification mode, resize input to 224x224.

    Args:
        inputs: a tensor of size [batch_size, height, width, channels].
        num_classes: number of predicted classes.
        is_training: whether or not the model is being trained.
        dropout_keep_prob: the probability that activations are kept in the dropout
            layers during training.
        spatial_squeeze: whether or not should squeeze the spatial dimensions of the
            outputs. Useful to remove unnecessary dimensions for classification.
        scope: Optional scope for the variables.

    Returns:
        the last op containing the log predictions and end_points dict.
    """
    with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc:
        end_points_collection = sc.name + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
                            outputs_collections=end_points_collection):
            net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
            net = slim.max_pool2d(net, [2, 2], scope='pool1')
            net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
            net = slim.max_pool2d(net, [2, 2], scope='pool2')
            net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
            net = slim.max_pool2d(net, [2, 2], scope='pool3')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
            net = slim.max_pool2d(net, [2, 2], scope='pool4')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
            net = slim.max_pool2d(net, [2, 2], scope='pool5')
            # Use conv2d instead of fully_connected layers.
            net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
            net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                               scope='dropout6')
            net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
            net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                               scope='dropout7')
            net = slim.conv2d(net, 1000, [1, 1],
                              activation_fn=None,
                              normalizer_fn=None,
                              scope='fc8')
            # Convert end_points_collection into a end_point dict.
            end_points = slim.utils.convert_collection_to_dict(end_points_collection)
            if spatial_squeeze:
                # net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
                net = custom_layers.spatial_squeeze(net)
                net = custom_layers.pad_logits(net, pad=(num_classes - 1000, 0))
                end_points[sc.name + '/fc8'] = net
            return net, end_points
Esempio n. 2
0
def vgg_16(inputs,
           num_classes=1000,
           is_training=True,
           dropout_keep_prob=0.5,
           spatial_squeeze=True,
           scope='vgg_16'):
    """Oxford Net VGG 16-Layers version D Example.

    Note: All the fully_connected layers have been transformed to conv2d layers.
                To use in classification mode, resize input to 224x224.

    Args:
        inputs: a tensor of size [batch_size, height, width, channels].
        num_classes: number of predicted classes.
        is_training: whether or not the model is being trained.
        dropout_keep_prob: the probability that activations are kept in the dropout
            layers during training.
        spatial_squeeze: whether or not should squeeze the spatial dimensions of the
            outputs. Useful to remove unnecessary dimensions for classification.
        scope: Optional scope for the variables.

    Returns:
        the last op containing the log predictions and end_points dict.
    """
    with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc:
        end_points_collection = sc.name + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with slim.arg_scope(
            [slim.conv2d, slim.fully_connected, slim.max_pool2d],
                outputs_collections=end_points_collection):
            net = slim.repeat(inputs,
                              2,
                              slim.conv2d,
                              64, [3, 3],
                              scope='conv1')
            net = slim.max_pool2d(net, [2, 2], scope='pool1')
            net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
            net = slim.max_pool2d(net, [2, 2], scope='pool2')
            net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
            net = slim.max_pool2d(net, [2, 2], scope='pool3')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
            net = slim.max_pool2d(net, [2, 2], scope='pool4')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
            net = slim.max_pool2d(net, [2, 2], scope='pool5')
            # Use conv2d instead of fully_connected layers.
            net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
            net = slim.dropout(net,
                               dropout_keep_prob,
                               is_training=is_training,
                               scope='dropout6')
            net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
            net = slim.dropout(net,
                               dropout_keep_prob,
                               is_training=is_training,
                               scope='dropout7')
            net = slim.conv2d(net,
                              1000, [1, 1],
                              activation_fn=None,
                              normalizer_fn=None,
                              scope='fc8')
            # Convert end_points_collection into a end_point dict.
            end_points = slim.utils.convert_collection_to_dict(
                end_points_collection)
            if spatial_squeeze:
                # net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
                net = custom_layers.spatial_squeeze(net)
                net = custom_layers.pad_logits(net,
                                               pad=(num_classes - 1000, 0))
                end_points[sc.name + '/fc8'] = net
            return net, end_points
def mobilenets(inputs,
               num_classes=1000,
               width_multiplier=1.0,
               is_training=True,
               dropout_keep_prob=0.5,
               scope='MobileNets'):
    """MobileNets implementation.
    Args:
        inputs: a tensor of size [batch_size, height, width, channels].
        num_classes: number of predicted classes.
        is_training: whether or not the model is being trained.
        dropout_keep_prob: the probability that activations are kept in the dropout
            layers during training.
        scope: Optional scope for the variables.

    Returns:
        the last op containing the log predictions and end_points dict.
    """
    kernel_size = [3, 3]
    padding = [(kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2]

    def mobilenet_block(net, num_out_channels, stride=[1, 1], scope=None):
        """Basic MobileNet block combining:
         - depthwise conv + BN + relu
         - 1x1 conv + BN + relu
        """
        with tf.variable_scope(scope, 'block', [net]) as sc:
            num_out_channels = int(num_out_channels * width_multiplier)
            if stride[0] == 1 and stride[1] == 1:
                # Depthwise convolution with stride=1
                net = custom_layers.depthwise_convolution2d(net,
                                                            kernel_size,
                                                            depth_multiplier=1,
                                                            stride=stride,
                                                            scope='conv_dw')
            else:
                # Mimic CAFFE padding if stride > 1.
                net = custom_layers.pad2d(net, pad=padding)
                net = custom_layers.depthwise_convolution2d(net,
                                                            kernel_size,
                                                            padding='VALID',
                                                            depth_multiplier=1,
                                                            stride=stride,
                                                            scope='conv_dw')
            # Pointwise convolution.
            net = slim.conv2d(net, num_out_channels, [1, 1], scope='conv_pw')
            return net

    with tf.variable_scope(scope, 'MobileNets', [inputs]) as sc:
        end_points = {}
        # First full convolution...
        net = custom_layers.pad2d(inputs, pad=padding)
        net = slim.conv2d(net,
                          32,
                          kernel_size,
                          stride=[2, 2],
                          padding='VALID',
                          scope='conv1')
        # net = slim.conv2d(inputs, 32, [ksize, ksize], stride=[2, 2], scope='conv1')
        # Then, MobileNet blocks!
        net = mobilenet_block(net, 64, scope='block2')
        net = mobilenet_block(net, 128, stride=[2, 2], scope='block3')
        net = mobilenet_block(net, 128, scope='block4')
        net = mobilenet_block(net, 256, stride=[2, 2], scope='block5')
        net = mobilenet_block(net, 256, scope='block6')
        net = mobilenet_block(net, 512, stride=[2, 2], scope='block7')
        # Intermediate blocks...
        for i in range(5):
            net = mobilenet_block(net, 512, scope='block%i' % (i + 8))
        # Final blocks.
        net = mobilenet_block(net, 1024, stride=[2, 2], scope='block13')
        net = mobilenet_block(net, 1024, scope='block14')
        # Spatial pooling + fully connected layer.
        net = custom_layers.spatial_mean(net,
                                         keep_dims=True,
                                         scope='spatial_mean14')
        net = slim.conv2d(net,
                          1000, [1, 1],
                          activation_fn=None,
                          normalizer_fn=None,
                          normalizer_params=None,
                          biases_initializer=tf.zeros_initializer(),
                          scope='conv_fc15')
        net = custom_layers.spatial_squeeze(net)
        # net = slim.fully_connected(net, 1000,  scope='fc15')

        # Logits padding...
        net = custom_layers.pad_logits(net, pad=(num_classes - 1000, 0))
        return net, end_points
Esempio n. 4
0
def mobilenets_btree(inputs,
                     num_classes=1000,
                     kernel_size=[3, 3],
                     width_multiplier=1.0,
                     dropouts=[0.5],
                     pad_logits=True,
                     is_training=True,
                     reuse=None,
                     scope='MobileNets'):
    """MobileNets implementation.
    Args:
        inputs: a tensor of size [batch_size, height, width, channels].
        num_classes: number of predicted classes.
        is_training: whether or not the model is being trained.
        dropout_keep_prob: the probability that activations are kept in the dropout
            layers during training.
        scope: Optional scope for the variables.

    Returns:
        the last op containing the log predictions and end_points dict.
    """
    # MobileNets kernel size and padding (for layers with stride > 1).
    # kernel_size = [3, 3]
    padding = [(kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2]

    def mobilenet_block(net, num_out_channels, stride=[1, 1], scope=None):
        """Basic MobileNet block combining:
         - depthwise conv + BN + relu
         - 1x1 conv + BN + relu
        """
        with tf.variable_scope(scope, 'block', [net]) as sc:
            num_out_channels = int(num_out_channels * width_multiplier)
            if stride[0] == 1 and stride[1] == 1:
                # Depthwise convolution with stride=1
                net = custom_layers.depthwise_convolution2d(net,
                                                            kernel_size,
                                                            depth_multiplier=1,
                                                            stride=stride,
                                                            scope='conv_dw')
            else:
                # Mimic CAFFE padding if stride > 1 => usually better accuracy.
                net = custom_layers.pad2d(net, pad=padding)
                net = custom_layers.depthwise_convolution2d(net,
                                                            kernel_size,
                                                            padding='VALID',
                                                            depth_multiplier=1,
                                                            stride=stride,
                                                            scope='conv_dw')
            # Pointwise convolution.
            net = slim.conv2d(net, num_out_channels, [1, 1], scope='conv_pw')
            return net

    def mobilenet_block_btree_v1(net,
                                 num_out_channels,
                                 stride=[1, 1],
                                 split=2,
                                 scope=None):
        """Basic MobileNet block combining:
         - depthwise conv + BN + relu
         - 1x1 conv + BN + relu
        """
        with tf.variable_scope(scope, 'block', [net]) as sc:
            num_out_channels = int(num_out_channels * width_multiplier)
            # Depthwise convolution with stride=1
            net = custom_layers.depthwise_convolution2d(net,
                                                        kernel_size,
                                                        depth_multiplier=1,
                                                        stride=stride,
                                                        scope='conv_dw')
            # Split-pointwise convolution.
            net = btree_layers.conv2d_1x1_split(net,
                                                num_out_channels,
                                                split=split,
                                                scope='conv_pw_split')
            return net

    def mobilenet_block_btree_v2(net,
                                 num_out_channels,
                                 stride=[1, 1],
                                 split=2,
                                 scope=None):
        """Combination of ResNets block and B-tree.
        """
        with tf.variable_scope(scope, 'block', [net]) as sc:
            # Start with Batch Norm.
            net = custom_layers.batch_norm(net)
            # Depthwise convolution with stride=1
            net = custom_layers.depthwise_convolution2d(net,
                                                        kernel_size,
                                                        depth_multiplier=1,
                                                        stride=stride,
                                                        activation_fn=None,
                                                        scope='conv_dw')
            # Split-pointwise convolution.
            num_out_channels = int(num_out_channels * width_multiplier)
            net = btree_layers.conv2d_1x1_split(net,
                                                num_out_channels,
                                                split=split,
                                                activation_fn=None,
                                                normalizer_fn=None,
                                                scope='conv_pw_split')
            return net

    with tf.variable_scope(scope, 'MobileNets', [inputs], reuse=reuse) as sc:
        end_points = {}
        # First full convolution...
        net = custom_layers.pad2d(inputs, pad=padding)
        net = slim.conv2d(net,
                          32,
                          kernel_size,
                          stride=[2, 2],
                          padding='VALID',
                          scope='conv1')
        # net = slim.conv2d(inputs, 32, kernel_size, stride=[2, 2],
        #                   padding='SAME', scope='conv1')
        # Then, MobileNet blocks!
        net = mobilenet_block(net, 64, scope='block2')
        net = mobilenet_block(net, 128, stride=[2, 2], scope='block3')
        net = mobilenet_block(net, 128, scope='block4')
        net = mobilenet_block(net, 256, stride=[2, 2], scope='block5')
        net = mobilenet_block(net, 256, scope='block6')
        net = mobilenet_block(net, 512, stride=[2, 2], scope='block7')
        # Intermediate blocks...
        for i in range(8, 16):
            with tf.variable_scope(scope, 'resblock_%i' % i, [net]) as sc:
                # Residual block...
                res = net
                net = mobilenet_block_btree_v2(net,
                                               512,
                                               split=4,
                                               scope='block%i_a' % i)
                net = btree_layers.translate_channels(
                    net, delta=64, scope='ch_translate_%i_a' % i)
                net = mobilenet_block_btree_v2(net,
                                               512,
                                               split=4,
                                               scope='block%i_b' % i)
                net = btree_layers.translate_channels(
                    net, delta=-64, scope='ch_translate_%i_b' % i)
                net = mobilenet_block_btree_v2(net,
                                               512,
                                               split=4,
                                               scope='block%i_c' % i)
                net = tf.add(res, net, 'residual_sum_%i' % i)
        net = custom_layers.batch_norm(net)

        # Final blocks.
        net = mobilenet_block(net, 1024, stride=[2, 2], scope='block13')
        net = mobilenet_block(net, 1024, scope='block14')
        # Spatial pooling + fully connected layer.
        net = custom_layers.spatial_mean(net,
                                         keep_dims=True,
                                         scope='spatial_mean14')
        net = slim.conv2d(net,
                          num_classes, [1, 1],
                          activation_fn=None,
                          normalizer_fn=None,
                          normalizer_params=None,
                          biases_initializer=tf.zeros_initializer(),
                          scope='conv_fc15')
        net = custom_layers.spatial_squeeze(net)

        # Logits padding: get everyone to the same number of classes.
        if pad_logits:
            net = custom_layers.pad_logits(net, pad=(num_classes - 1000, 0))
        return net, end_points