Ejemplo n.º 1
0
def alexnet_v2(inputs,
               num_classes=1000,
               is_training=True,
               dropout_keep_prob=0.5,
               spatial_squeeze=True,
               scope='alexnet_v2'):
    """AlexNet version 2.

    Described in: http://arxiv.org/pdf/1404.5997v2.pdf
    Parameters from:
    github.com/akrizhevsky/cuda-convnet2/blob/master/layers/
    layers-imagenet-1gpu.cfg

    Note: All the fully_connected layers have been transformed to conv2d layers.
          To use in classification mode, resize input to 224x224. To use in fully
          convolutional mode, set spatial_squeeze to false.
          The LRN layers have been removed and change the initializers from
          random_normal_initializer to xavier_initializer.

    Args:
      inputs: a tensor of size [batch_size, height, width, channels].
      num_classes: number of predicted classes.
      is_training: whether or not the model is being trained.
      dropout_keep_prob: the probability that activations are kept in the dropout
        layers during training.
      spatial_squeeze: whether or not should squeeze the spatial dimensions of the
        outputs. Useful to remove unnecessary dimensions for classification.
      scope: Optional scope for the variables.

    Returns:
      the last op containing the log predictions and end_points dict.
    """
    with variable_scope.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with arg_scope(
            [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
                outputs_collections=[end_points_collection]):
            net = layers.conv2d(inputs,
                                64, [11, 11],
                                4,
                                padding='VALID',
                                scope='conv1')
            net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool1')
            net = layers.conv2d(net, 192, [5, 5], scope='conv2')
            net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool2')
            net = layers.conv2d(net, 384, [3, 3], scope='conv3')
            net = layers.conv2d(net, 384, [3, 3], scope='conv4')
            net = layers.conv2d(net, 256, [3, 3], scope='conv5')
            net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool5')

            # Use conv2d instead of fully_connected layers.
            with arg_scope(
                [layers.conv2d],
                    weights_initializer=trunc_normal(0.005),
                    biases_initializer=init_ops.constant_initializer(0.1)):
                net = layers.conv2d(net,
                                    4096, [5, 5],
                                    padding='VALID',
                                    scope='fc6')
                net = layers_lib.dropout(net,
                                         dropout_keep_prob,
                                         is_training=is_training,
                                         scope='dropout6')
                net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
                net = layers_lib.dropout(net,
                                         dropout_keep_prob,
                                         is_training=is_training,
                                         scope='dropout7')
                net = layers.conv2d(
                    net,
                    num_classes, [1, 1],
                    activation_fn=None,
                    normalizer_fn=None,
                    biases_initializer=init_ops.zeros_initializer(),
                    scope='fc8')

            # Convert end_points_collection into a end_point dict.
            end_points = utils.convert_collection_to_dict(
                end_points_collection)
            if spatial_squeeze:
                net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed')
                end_points[sc.name + '/fc8'] = net
            return net, end_points
Ejemplo n.º 2
0
def inception_v1(inputs,
                 num_classes=1000,
                 is_training=True,
                 dropout_keep_prob=0.8,
                 prediction_fn=layers_lib.softmax,
                 spatial_squeeze=True,
                 reuse=None,
                 scope='InceptionV1'):
    """Defines the Inception V1 architecture.

    This architecture is defined in:

      Going deeper with convolutions
      Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
      Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.
      http://arxiv.org/pdf/1409.4842v1.pdf.

    The default image size used to train this network is 224x224.

    Args:
      inputs: a tensor of size [batch_size, height, width, channels].
      num_classes: number of predicted classes.
      is_training: whether is training or not.
      dropout_keep_prob: the percentage of activation values that are retained.
      prediction_fn: a function to get predictions out of logits.
      spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
          of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.

    Returns:
      logits: the pre-softmax activations, a tensor of size
        [batch_size, num_classes]
      end_points: a dictionary from components of the network to the corresponding
        activation.
    """
    # Final pooling and prediction
    with variable_scope.variable_scope(scope,
                                       'InceptionV1', [inputs, num_classes],
                                       reuse=reuse) as scope:
        with arg_scope([layers_lib.batch_norm, layers_lib.dropout],
                       is_training=is_training):
            net, end_points = inception_v1_base(inputs, scope=scope)
            with variable_scope.variable_scope('Logits'):
                net = layers_lib.avg_pool2d(net, [7, 7],
                                            stride=1,
                                            scope='MaxPool_0a_7x7')
                net = layers_lib.dropout(net,
                                         dropout_keep_prob,
                                         scope='Dropout_0b')
                logits = layers.conv2d(net,
                                       num_classes, [1, 1],
                                       activation_fn=None,
                                       normalizer_fn=None,
                                       scope='Conv2d_0c_1x1')
                if spatial_squeeze:
                    logits = array_ops.squeeze(logits, [1, 2],
                                               name='SpatialSqueeze')

                end_points['Logits'] = logits
                end_points['Predictions'] = prediction_fn(logits,
                                                          scope='Predictions')
    return logits, end_points
Ejemplo n.º 3
0
def vgg_a(inputs,
          num_classes=1000,
          is_training=True,
          dropout_keep_prob=0.5,
          spatial_squeeze=True,
          scope='vgg_a'):
    """Oxford Net VGG 11-Layers version A Example.

    Note: All the fully_connected layers have been transformed to conv2d layers.
          To use in classification mode, resize input to 224x224.

    Args:
      inputs: a tensor of size [batch_size, height, width, channels].
      num_classes: number of predicted classes.
      is_training: whether or not the model is being trained.
      dropout_keep_prob: the probability that activations are kept in the dropout
        layers during training.
      spatial_squeeze: whether or not should squeeze the spatial dimensions of the
        outputs. Useful to remove unnecessary dimensions for classification.
      scope: Optional scope for the variables.

    Returns:
      the last op containing the log predictions and end_points dict.
    """
    with variable_scope.variable_scope(scope, 'vgg_a', [inputs]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with arg_scope(
            [layers.conv2d, layers_lib.max_pool2d],
                outputs_collections=end_points_collection):
            net = layers_lib.repeat(
                inputs, 1, layers.conv2d, 64, [3, 3], scope='conv1')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool1')
            net = layers_lib.repeat(net, 1, layers.conv2d, 128, [
                                    3, 3], scope='conv2')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool2')
            net = layers_lib.repeat(net, 2, layers.conv2d, 256, [
                                    3, 3], scope='conv3')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool3')
            net = layers_lib.repeat(net, 2, layers.conv2d, 512, [
                                    3, 3], scope='conv4')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool4')
            net = layers_lib.repeat(net, 2, layers.conv2d, 512, [
                                    3, 3], scope='conv5')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool5')
            # Use conv2d instead of fully_connected layers.
            net = layers.conv2d(
                net, 4096, [7, 7], padding='VALID', scope='fc6')
            net = layers_lib.dropout(
                net, dropout_keep_prob, is_training=is_training, scope='dropout6')
            net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
            net = layers_lib.dropout(
                net, dropout_keep_prob, is_training=is_training, scope='dropout7')
            net = layers.conv2d(
                net,
                num_classes, [1, 1],
                activation_fn=None,
                normalizer_fn=None,
                scope='fc8')
            # Convert end_points_collection into a end_point dict.
            end_points = utils.convert_collection_to_dict(
                end_points_collection)
            if spatial_squeeze:
                net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed')
                end_points[sc.name + '/fc8'] = net
            return net, end_points
Ejemplo n.º 4
0
def inception_v2(inputs,
                 num_classes=1000,
                 is_training=True,
                 dropout_keep_prob=0.8,
                 min_depth=16,
                 depth_multiplier=1.0,
                 prediction_fn=layers_lib.softmax,
                 spatial_squeeze=True,
                 reuse=None,
                 scope='InceptionV2'):
    """Inception v2 model for classification.

    Constructs an Inception v2 network for classification as described in
    http://arxiv.org/abs/1502.03167.

    The recommended image size used to train this network is 224x224. For image
    sizes that differ substantially, it is recommended to use inception_v2_base()
    and connect custom final layers to the output.

    Args:
      inputs: a tensor of shape [batch_size, height, width, channels].
      num_classes: number of predicted classes.
      is_training: whether is training or not.
      dropout_keep_prob: the percentage of activation values that are retained.
      min_depth: Minimum depth value (number of channels) for all convolution ops.
        Enforced when depth_multiplier < 1, and not an active constraint when
        depth_multiplier >= 1.
      depth_multiplier: Float multiplier for the depth (number of channels)
        for all convolution ops. The value must be greater than zero. Typical
        usage will be to set this value in (0, 1) to reduce the number of
        parameters or computation cost of the model.
      prediction_fn: a function to get predictions out of logits.
      spatial_squeeze: if True, logits is of shape [B, C], if false logits is
          of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
          Note that input image sizes other than 224x224 might lead to different
          spatial dimensions, and hence cannot be squeezed. In this event,
          it is best to set spatial_squeeze as False, and perform a reduce_mean
          over the resulting spatial dimensions with sizes exceeding 1.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.

    Returns:
      logits: the pre-softmax activations, a tensor of size
        [batch_size, num_classes]
      end_points: a dictionary from components of the network to the corresponding
        activation.

    Raises:
      ValueError: if depth_multiplier <= 0.
    """
    if depth_multiplier <= 0:
        raise ValueError('depth_multiplier is not greater than zero.')

    # Final pooling and prediction
    with variable_scope.variable_scope(
            scope, 'InceptionV2', [inputs, num_classes], reuse=reuse) as scope:
        with arg_scope(
                [layers_lib.batch_norm, layers_lib.dropout], is_training=is_training):
            net, end_points = inception_v2_base(
                inputs,
                scope=scope,
                min_depth=min_depth,
                depth_multiplier=depth_multiplier)
            with variable_scope.variable_scope('Logits'):
                kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7])
                net = layers_lib.avg_pool2d(
                    net,
                    kernel_size,
                    padding='VALID',
                    scope='AvgPool_1a_{}x{}'.format(*kernel_size))
                # 1 x 1 x 1024
                net = layers_lib.dropout(
                    net, keep_prob=dropout_keep_prob, scope='Dropout_1b')
                logits = layers.conv2d(
                    net,
                    num_classes, [1, 1],
                    activation_fn=None,
                    normalizer_fn=None,
                    scope='Conv2d_1c_1x1')
                if spatial_squeeze:
                    logits = array_ops.squeeze(
                        logits, [1, 2], name='SpatialSqueeze')
            end_points['Logits'] = logits
            end_points['Predictions'] = prediction_fn(
                logits, scope='Predictions')
    return logits, end_points
Ejemplo n.º 5
0
def inception_v3(inputs,
                 num_classes=1000,
                 is_training=True,
                 dropout_keep_prob=0.8,
                 min_depth=16,
                 depth_multiplier=1.0,
                 prediction_fn=layers_lib.softmax,
                 spatial_squeeze=True,
                 reuse=None,
                 create_aux_logits=True,
                 scope='InceptionV3'):
    """Inception model from http://arxiv.org/abs/1512.00567.

    "Rethinking the Inception Architecture for Computer Vision"

    Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
    Zbigniew Wojna.

    With the default arguments this method constructs the exact model defined in
    the paper. However, one can experiment with variations of the inception_v3
    network by changing arguments dropout_keep_prob, min_depth and
    depth_multiplier.

    The default image size used to train this network is 299x299.

    Args:
      inputs: a tensor of size [batch_size, height, width, channels].
      num_classes: number of predicted classes.
      is_training: whether is training or not.
      dropout_keep_prob: the percentage of activation values that are retained.
      min_depth: Minimum depth value (number of channels) for all convolution ops.
        Enforced when depth_multiplier < 1, and not an active constraint when
        depth_multiplier >= 1.
      depth_multiplier: Float multiplier for the depth (number of channels)
        for all convolution ops. The value must be greater than zero. Typical
        usage will be to set this value in (0, 1) to reduce the number of
        parameters or computation cost of the model.
      prediction_fn: a function to get predictions out of logits.
      spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
        To use this parameter, the input images must be smaller
        than 300x300 pixels, in which case the output logit layer
        does not contain spatial information and can be removed.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      create_aux_logits: whether to create auxiliary logits.
      scope: Optional variable_scope.

    Returns:
      logits: the pre-softmax activations, a tensor of size
        [batch_size, num_classes]
      end_points: a dictionary from components of the network to the corresponding
        activation.

    Raises:
      ValueError: if 'depth_multiplier' is less than or equal to zero.
    """
    if depth_multiplier <= 0:
        raise ValueError('depth_multiplier is not greater than zero.')

    def depth(d):
        return max(int(d * depth_multiplier), min_depth)

    with variable_scope.variable_scope(scope,
                                       'InceptionV3', [inputs, num_classes],
                                       reuse=reuse) as scope:
        with arg_scope([layers_lib.batch_norm, layers_lib.dropout],
                       is_training=is_training):
            net, end_points = inception_v3_base(
                inputs,
                scope=scope,
                min_depth=min_depth,
                depth_multiplier=depth_multiplier)

            # Auxiliary Head logits
            if create_aux_logits and num_classes:
                with arg_scope([
                        layers.conv2d, layers_lib.max_pool2d,
                        layers_lib.avg_pool2d
                ],
                               stride=1,
                               padding='SAME'):
                    aux_logits = end_points['Mixed_6e']
                    with variable_scope.variable_scope('AuxLogits'):
                        aux_logits = layers_lib.avg_pool2d(
                            aux_logits, [5, 5],
                            stride=3,
                            padding='VALID',
                            scope='AvgPool_1a_5x5')
                        aux_logits = layers.conv2d(aux_logits,
                                                   depth(128), [1, 1],
                                                   scope='Conv2d_1b_1x1')

                        # Shape of feature map before the final layer.
                        kernel_size = _reduced_kernel_size_for_small_input(
                            aux_logits, [5, 5])
                        aux_logits = layers.conv2d(
                            aux_logits,
                            depth(768),
                            kernel_size,
                            weights_initializer=trunc_normal(0.01),
                            padding='VALID',
                            scope='Conv2d_2a_{}x{}'.format(*kernel_size))
                        aux_logits = layers.conv2d(
                            aux_logits,
                            num_classes, [1, 1],
                            activation_fn=None,
                            normalizer_fn=None,
                            weights_initializer=trunc_normal(0.001),
                            scope='Conv2d_2b_1x1')
                        if spatial_squeeze:
                            aux_logits = array_ops.squeeze(
                                aux_logits, [1, 2], name='SpatialSqueeze')
                        end_points['AuxLogits'] = aux_logits

            # Final pooling and prediction
            with variable_scope.variable_scope('Logits'):
                kernel_size = _reduced_kernel_size_for_small_input(net, [8, 8])
                net = layers_lib.avg_pool2d(
                    net,
                    kernel_size,
                    padding='VALID',
                    scope='AvgPool_1a_{}x{}'.format(*kernel_size))
                # 1 x 1 x 2048
                net = layers_lib.dropout(net,
                                         keep_prob=dropout_keep_prob,
                                         scope='Dropout_1b')
                end_points['PreLogits'] = net
                # 2048
                logits = layers.conv2d(net,
                                       num_classes, [1, 1],
                                       activation_fn=None,
                                       normalizer_fn=None,
                                       scope='Conv2d_1c_1x1')
                if spatial_squeeze:
                    logits = array_ops.squeeze(logits, [1, 2],
                                               name='SpatialSqueeze')
                # 1000
            end_points['Logits'] = logits
            end_points['Predictions'] = prediction_fn(logits,
                                                      scope='Predictions')
    return logits, end_points
Ejemplo n.º 6
0
def overfeat(inputs,
             num_classes=1000,
             is_training=True,
             dropout_keep_prob=0.5,
             spatial_squeeze=True,
             scope='overfeat'):
    """Contains the model definition for the OverFeat network.

    The definition for the network was obtained from:
      OverFeat: Integrated Recognition, Localization and Detection using
      Convolutional Networks
      Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and
      Yann LeCun, 2014
      http://arxiv.org/abs/1312.6229

    Note: All the fully_connected layers have been transformed to conv2d layers.
          To use in classification mode, resize input to 231x231. To use in fully
          convolutional mode, set spatial_squeeze to false.

    Args:
      inputs: a tensor of size [batch_size, height, width, channels].
      num_classes: number of predicted classes.
      is_training: whether or not the model is being trained.
      dropout_keep_prob: the probability that activations are kept in the dropout
        layers during training.
      spatial_squeeze: whether or not should squeeze the spatial dimensions of the
        outputs. Useful to remove unnecessary dimensions for classification.
      scope: Optional scope for the variables.

    Returns:
      the last op containing the log predictions and end_points dict.

    """
    with variable_scope.variable_scope(scope, 'overfeat', [inputs]) as sc:
        end_points_collection = sc.name + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d
        with arg_scope(
            [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
                outputs_collections=end_points_collection):
            net = layers.conv2d(inputs,
                                64, [11, 11],
                                4,
                                padding='VALID',
                                scope='conv1')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool1')
            net = layers.conv2d(net,
                                256, [5, 5],
                                padding='VALID',
                                scope='conv2')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool2')
            net = layers.conv2d(net, 512, [3, 3], scope='conv3')
            net = layers.conv2d(net, 1024, [3, 3], scope='conv4')
            net = layers.conv2d(net, 1024, [3, 3], scope='conv5')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool5')
            with arg_scope(
                [layers.conv2d],
                    weights_initializer=trunc_normal(0.005),
                    biases_initializer=init_ops.constant_initializer(0.1)):
                # Use conv2d instead of fully_connected layers.
                net = layers.conv2d(net,
                                    3072, [6, 6],
                                    padding='VALID',
                                    scope='fc6')
                net = layers_lib.dropout(net,
                                         dropout_keep_prob,
                                         is_training=is_training,
                                         scope='dropout6')
                net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
                net = layers_lib.dropout(net,
                                         dropout_keep_prob,
                                         is_training=is_training,
                                         scope='dropout7')
                net = layers.conv2d(
                    net,
                    num_classes, [1, 1],
                    activation_fn=None,
                    normalizer_fn=None,
                    biases_initializer=init_ops.zeros_initializer(),
                    scope='fc8')
            # Convert end_points_collection into a end_point dict.
            end_points = utils.convert_collection_to_dict(
                end_points_collection)
            if spatial_squeeze:
                net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed')
                end_points[sc.name + '/fc8'] = net
            return net, end_points