Esempio n. 1
0
def create_test_network_2():
    """Aligned network for test.

  The graph corresponds to a variation to the network from
  "create_test_network_1". Layers 2 and 3 are changed to max-pooling operations.
  Since the functionality is the same as convolution, the network is aligned and
  the receptive field size is the same as from the network created using
  create_test_network_1().

  Returns:
    g: Tensorflow graph object (Graph proto).
  """
    g = tf.Graph()
    with g.as_default():
        # An input test image with unknown spatial resolution.
        x = tf.placeholder(tf.float32, (None, None, None, 1),
                           name='input_image')
        # Left branch.
        l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID')
        # Right branch.
        l2_pad = tf.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]])
        l2 = slim.max_pool2d(l2_pad, [3, 3],
                             stride=2,
                             scope='L2',
                             padding='VALID')
        l3 = slim.max_pool2d(l2, [1, 1], stride=2, scope='L3', padding='VALID')
        # Addition.
        tf.nn.relu(l1 + l3, name='output')
    return g
Esempio n. 2
0
def lenet(inputs, scope='lenet', is_training=True, reuse=False):
    layers = OrderedDict()
    net = inputs
    with tf.variable_scope(scope, reuse=reuse):
        with ExitStack() as stack:
            stack.enter_context(
                slim.arg_scope(
                    [slim.fully_connected, slim.conv2d],
                    activation_fn=tf.nn.relu,
                    weights_regularizer=slim.l2_regularizer(2.5e-5)))
            stack.enter_context(slim.arg_scope([slim.conv2d], padding='VALID'))
            net = slim.conv2d(net, 20, 5, scope='conv1')
            layers['conv1'] = net
            net = slim.max_pool2d(net, 2, stride=2, scope='pool1')
            layers['pool1'] = net
            net = slim.conv2d(net, 50, 5, scope='conv2')
            layers['conv2'] = net
            net = slim.max_pool2d(net, 2, stride=2, scope='pool2')
            layers['pool2'] = net
            net = tf.layers.flatten(net)
            net = slim.fully_connected(net, 500, scope='fc3')
            layers['fc3'] = net
            net = slim.fully_connected(net,
                                       10,
                                       activation_fn=None,
                                       scope='fc4')
            layers['fc4'] = net
    return net, layers
def _spp_block(inputs, data_format='NCHW'):
    return tf.concat([
        slim.max_pool2d(inputs, 13, 1, 'SAME'),
        slim.max_pool2d(inputs, 9, 1, 'SAME'),
        slim.max_pool2d(inputs, 5, 1, 'SAME'), inputs
    ],
                     axis=1 if data_format == 'NCHW' else 3)
Esempio n. 4
0
def ChainedResidualPooling(inputs, n_filters=256):
    """
    Chained residual pooling aims to capture background 
    context from a large image region. This component is 
    built as a chain of 2 pooling blocks, each consisting 
    of one max-pooling layer and one convolution layer. One pooling
    block takes the output of the previous pooling block as
    input. The output feature maps of all pooling blocks are 
    fused together with the input feature map through summation 
    of residual connections.

    Arguments:
      inputs: The input tensor
      n_filters: Number of output feature maps for each conv

    Returns:
      Double-pooled feature maps
    """

    net_relu = tf.nn.relu(inputs)
    net = slim.max_pool2d(net_relu, [5, 5], stride=1, padding='SAME')
    net = slim.conv2d(net, n_filters, 3, activation_fn=None)
    net_sum_1 = tf.add(net, net_relu)

    net = slim.max_pool2d(net, [5, 5], stride=1, padding='SAME')
    net = slim.conv2d(net, n_filters, 3, activation_fn=None)
    net_sum_2 = tf.add(net, net_sum_1)

    return net_sum_2
Esempio n. 5
0
def conv_net(inputs):
    '''
    Build a CNN.

    Parameters
    ----------
    inputs : input data

    Returns
    -------
    net : a CNN architecture
    '''

    # using the scope to avoid mentioning the parameters repeatedly
    with slim.arg_scope([slim.conv2d, slim.fully_connected],
        activation_fn = leaky_relu(0.005),
        weights_initializer = tf.truncated_normal_initializer(0.0, 0.01),
        weights_regularizer = slim.l2_regularizer(0.0005)):

        net = slim.conv2d(inputs, 512, (3, inputs.shape[2]), 1, padding = 'valid', scope = 'conv_1') # (3, dimension_count)
        net = slim.max_pool2d(net, (4, 1), 4, padding = 'valid', scope = 'pool_2')
        net = slim.conv2d(net, 512, (5, 1), 1, scope = 'conv_3')
        net = slim.max_pool2d(net, (4, 1), 4, padding = 'valid', scope = 'pool_4')
        net = slim.flatten(net, scope = 'flatten_5')
        net = slim.fully_connected(net, 2, scope = 'fc_6', activation_fn = tf.nn.softmax)

    return net
def vgg_16(inputs,
           num_classes=1000,
           is_training=True,
           dropout_keep_prob=0.5,
           spatial_squeeze=True,
           scope='vgg_16',
           update_top_only=False,
           fc_conv_padding='VALID',
           reuse=None):
    with tf.compat.v1.variable_scope(scope, 'vgg_16', [inputs]) as sc, \
             slim.arg_scope(vgg_arg_scope(reuse = reuse)):
        end_points_collection = sc.name + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with slim.arg_scope(
            [slim.conv2d, slim.fully_connected, slim.max_pool2d],
                outputs_collections=end_points_collection):
            net = slim.repeat(inputs,
                              2,
                              slim.conv2d,
                              64, [3, 3],
                              scope='conv1')
            net = slim.max_pool2d(net, [2, 2], scope='pool1')
            net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
            net = slim.max_pool2d(net, [2, 2], scope='pool2')
            net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
            net = slim.max_pool2d(net, [2, 2], scope='pool3')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
            net = slim.max_pool2d(net, [2, 2], scope='pool4')
            # if update_top_only:
            #   net = tf.stop_gradient(net)
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
            net = slim.max_pool2d(net, [2, 2], scope='pool5')
            # Use conv2d instead of fully_connected layers.
            net = slim.conv2d(net,
                              4096, [7, 7],
                              padding=fc_conv_padding,
                              scope='fc6')
            net = slim.dropout(net,
                               dropout_keep_prob,
                               is_training=is_training,
                               scope='dropout6')
            net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
            net = slim.dropout(net,
                               dropout_keep_prob,
                               is_training=is_training,
                               scope='dropout7')
            if num_classes is not None:
                net = slim.conv2d(net,
                                  num_classes, [1, 1],
                                  activation_fn=None,
                                  normalizer_fn=None,
                                  scope='fc8')
            # Convert end_points_collection into a end_point dict.
            end_points = slim.utils.convert_collection_to_dict(
                end_points_collection)
            if spatial_squeeze:
                net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
                end_points[sc.name + '/fc8'] = net
            return net, end_points
def define_vggish_slim(training=False):
    """Defines the VGGish TensorFlow model.
  All ops are created in the current default graph, under the scope 'vggish/'.
  The input is a placeholder named 'vggish/input_features' of type float32 and
  shape [batch_size, num_frames, num_bands] where batch_size is variable and
  num_frames and num_bands are constants, and [num_frames, num_bands] represents
  a log-mel-scale spectrogram patch covering num_bands frequency bands and
  num_frames time frames (where each frame step is usually 10ms). This is
  produced by computing the stabilized log(mel-spectrogram + params.LOG_OFFSET).
  The output is an op named 'vggish/embedding' which produces the activations of
  a 128-D embedding layer, which is usually the penultimate layer when used as
  part of a full model with a final classifier layer.
  Args:
    training: If true, all parameters are marked trainable.
  Returns:
    The op 'vggish/embeddings'.
  """
    # Defaults:
    # - All weights are initialized to N(0, INIT_STDDEV).
    # - All biases are initialized to 0.
    # - All activations are ReLU.
    # - All convolutions are 3x3 with stride 1 and SAME padding.
    # - All max-pools are 2x2 with stride 2 and SAME padding.
    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                        weights_initializer=tf.truncated_normal_initializer(
                            stddev=params.INIT_STDDEV),
                        biases_initializer=tf.zeros_initializer(),
                        activation_fn=tf.nn.relu,
                        trainable=training), \
         slim.arg_scope([slim.conv2d],
                        kernel_size=[3, 3], stride=1, padding='SAME'), \
         slim.arg_scope([slim.max_pool2d],
                        kernel_size=[2, 2], stride=2, padding='SAME'), \
         tf.variable_scope('vggish'):
        # Input: a batch of 2-D log-mel-spectrogram patches.
        features = tf.placeholder(tf.float32,
                                  shape=(None, params.NUM_FRAMES,
                                         params.NUM_BANDS),
                                  name='input_features')
        # Reshape to 4-D so that we can convolve a batch with conv2d().
        net = tf.reshape(features,
                         [-1, params.NUM_FRAMES, params.NUM_BANDS, 1])

        # The VGG stack of alternating convolutions and max-pools.
        net = slim.conv2d(net, 64, scope='conv1')
        net = slim.max_pool2d(net, scope='pool1')
        net = slim.conv2d(net, 128, scope='conv2')
        net = slim.max_pool2d(net, scope='pool2')
        net = slim.repeat(net, 2, slim.conv2d, 256, scope='conv3')
        net = slim.max_pool2d(net, scope='pool3')
        net = slim.repeat(net, 2, slim.conv2d, 512, scope='conv4')
        net = slim.max_pool2d(net, scope='pool4')

        # Flatten before entering fully-connected layers
        net = slim.flatten(net)
        net = slim.repeat(net, 2, slim.fully_connected, 4096, scope='fc1')
        # The embedding layer.
        net = slim.fully_connected(net, params.EMBEDDING_SIZE, scope='fc2')
        return tf.identity(net, name='embedding')
Esempio n. 8
0
def vgg_16_base(inputs,
                is_training=True,
                scope='vgg_16',
                fc_conv_padding='VALID',
                final_endpoint=None):
    """
    VGG16模型
    :param inputs:a tensor [batch_size, height, width, channels]
    :param num_classes:分类数
    :param is_training: 是否训练
    :param dropout_keep_prob: 训练时dropout保持激活的可能性
    :param spatial_squeeze:是否压缩输出的空间维度
    :param scope:变量的可选范围
    :param fc_conv_padding: 全连接层的填充类型 'SAME' or 'VALID'
    :param global_pool: a boolean flag .True: 则对分类模块的输入需用平均池化
    :return: net: VGG net
             end_points :a dict of tensors with intermediate activations.
    """
    end_points = {}
    with tf.compat.v1.variable_scope(scope, 'vgg_16', [inputs]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, slim.fully_connected, slim.max_pool2d],
                outputs_collections=end_points_collection):
            net = slim.repeat(inputs,
                              2,
                              slim.conv2d,
                              64, [3, 3],
                              scope='conv1')
            net = slim.max_pool2d(net, [2, 2], scope='pool1')
            net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
            net = slim.max_pool2d(net, [2, 2], scope='pool2')
            net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
            net = slim.max_pool2d(net, [2, 2], scope='pool3')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
            net = slim.max_pool2d(net, [2, 2], scope='pool4')

            end_point = 'pool4'
            end_points[end_point] = net
            if end_point == final_endpoint:
                return net, end_points

            net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5')

            end_point = 'conv5_2'
            end_points[end_point] = net
            if end_point == final_endpoint:
                return net, end_points

            net = slim.repeat(net,
                              1,
                              slim.conv2d,
                              512, [3, 3],
                              scope='conv5_3')
            net = slim.max_pool2d(net, [2, 2], scope='pool5')

            # # Convert end_points_collection into a end_point dict.
            # end_points = slim.utils.convert_collection_to_dict(end_points_collection)
        return net, end_points
Esempio n. 9
0
def inference(images,
              keep_probability,
              phase_train=True,
              bottleneck_layer_size=128,
              weight_decay=0.0,
              reuse=None):
    batch_norm_params = {
        # Decay for the moving averages.
        'decay': 0.995,
        # epsilon to prevent 0s in variance.
        'epsilon': 0.001,
        # force in-place updates of mean and variance estimates
        'updates_collections': None,
        # Moving averages ends up in the trainable variables collection
        'variables_collections': [tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES],
    }
    with slim.arg_scope(
        [slim.conv2d, slim.fully_connected],
            weights_initializer=tf.compat.v1.keras.initializers.
            VarianceScaling(
                scale=1.0,
                mode="fan_avg",
                distribution=("uniform" if True else "truncated_normal")),
            weights_regularizer=tf.keras.regularizers.l2(0.5 * (weight_decay)),
            normalizer_fn=slim.batch_norm,
            normalizer_params=batch_norm_params):
        with tf.compat.v1.variable_scope('squeezenet', [images], reuse=reuse):
            with slim.arg_scope([slim.batch_norm, slim.dropout],
                                is_training=phase_train):
                net = slim.conv2d(images, 96, [7, 7], stride=2, scope='conv1')
                net = slim.max_pool2d(net, [3, 3], stride=2, scope='maxpool1')
                net = fire_module(net, 16, 64, scope='fire2')
                net = fire_module(net, 16, 64, scope='fire3')
                net = fire_module(net, 32, 128, scope='fire4')
                net = slim.max_pool2d(net, [2, 2], stride=2, scope='maxpool4')
                net = fire_module(net, 32, 128, scope='fire5')
                net = fire_module(net, 48, 192, scope='fire6')
                net = fire_module(net, 48, 192, scope='fire7')
                net = fire_module(net, 64, 256, scope='fire8')
                net = slim.max_pool2d(net, [3, 3], stride=2, scope='maxpool8')
                net = fire_module(net, 64, 256, scope='fire9')
                net = slim.dropout(net, keep_probability)
                net = slim.conv2d(net,
                                  1000, [1, 1],
                                  activation_fn=None,
                                  normalizer_fn=None,
                                  scope='conv10')
                net = slim.avg_pool2d(net,
                                      net.get_shape()[1:3],
                                      scope='avgpool10')
                net = tf.squeeze(net, [1, 2], name='logits')
                net = slim.fully_connected(net,
                                           bottleneck_layer_size,
                                           activation_fn=None,
                                           scope='Bottleneck',
                                           reuse=False)
    return net, None
Esempio n. 10
0
    def build_head(self, is_training):

        # Main network
        # Layer  1
        net = slim.repeat(self._image,
                          2,
                          slim.conv2d,
                          64, [3, 3],
                          trainable=False,
                          scope='conv1')
        net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1')

        # Layer 2
        net = slim.repeat(net,
                          2,
                          slim.conv2d,
                          128, [3, 3],
                          trainable=False,
                          scope='conv2')
        net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2')

        # Layer 3
        net = slim.repeat(net,
                          3,
                          slim.conv2d,
                          256, [3, 3],
                          trainable=is_training,
                          scope='conv3')
        net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3')

        # Layer 4
        net = slim.repeat(net,
                          3,
                          slim.conv2d,
                          512, [3, 3],
                          trainable=is_training,
                          scope='conv4')
        net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4')

        # Layer 5
        net = slim.repeat(net,
                          3,
                          slim.conv2d,
                          512, [3, 3],
                          trainable=is_training,
                          scope='conv5')

        # Append network to summaries
        self._act_summaries.append(net)

        # Append network as head layer
        self._layers['head'] = net

        return net
Esempio n. 11
0
def vgg_19(inputs,
           num_classes=1000,
           is_training=False,
           dropout_keep_prob=0.5,
           spatial_squeeze=True,
           scope='vgg_19',
           reuse = False,
           fc_conv_padding='VALID'):
  """Oxford Net VGG 19-Layers version E Example.
  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224.
  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.
    fc_conv_padding: the type of padding to use for the fully connected layer
      that is implemented as a convolutional layer. Use 'SAME' padding if you
      are applying the network in a fully convolutional manner and want to
      get a prediction map downsampled by a factor of 32 as an output. Otherwise,
      the output prediction map will be (input / 32) - 6 in case of 'VALID' padding.
  Returns:
    the last op containing the log predictions and end_points dict.
  """
  with tf.variable_scope(scope, 'vgg_19', [inputs], reuse=reuse) as sc:
    end_points_collection = sc.name + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d.
    with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
                        outputs_collections=end_points_collection):
      net = slim.repeat(inputs, 2, slim.conv2d, 64, 3, scope='conv1', reuse=reuse)
      net = slim.max_pool2d(net, [2, 2], scope='pool1')
      net = slim.repeat(net, 2, slim.conv2d, 128, 3, scope='conv2',reuse=reuse)
      net = slim.max_pool2d(net, [2, 2], scope='pool2')
      net = slim.repeat(net, 4, slim.conv2d, 256, 3, scope='conv3', reuse=reuse)
      net = slim.max_pool2d(net, [2, 2], scope='pool3')
      net = slim.repeat(net, 4, slim.conv2d, 512, 3, scope='conv4',reuse=reuse)
      net = slim.max_pool2d(net, [2, 2], scope='pool4')
      net = slim.repeat(net, 4, slim.conv2d, 512, 3, scope='conv5',reuse=reuse)
      net = slim.max_pool2d(net, [2, 2], scope='pool5')
      # Use conv2d instead of fully_connected layers.
      # Convert end_points_collection into a end_point dict.
      end_points = slim.utils.convert_collection_to_dict(end_points_collection)

      return net, end_points
Esempio n. 12
0
def reduction_b(net):
    with tf.variable_scope('Branch_0'):
        tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
        tower_conv_1 = slim.conv2d(tower_conv,
                                   384,
                                   3,
                                   stride=2,
                                   padding='VALID',
                                   scope='Conv2d_1a_3x3')
    with tf.variable_scope('Branch_1'):
        tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
        tower_conv1_1 = slim.conv2d(tower_conv1,
                                    256,
                                    3,
                                    stride=2,
                                    padding='VALID',
                                    scope='Conv2d_1a_3x3')
    with tf.variable_scope('Branch_2'):
        tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
        tower_conv2_1 = slim.conv2d(tower_conv2, 256, 3, scope='Conv2d_0b_3x3')
        tower_conv2_2 = slim.conv2d(tower_conv2_1,
                                    256,
                                    3,
                                    stride=2,
                                    padding='VALID',
                                    scope='Conv2d_1a_3x3')
    with tf.variable_scope('Branch_3'):
        tower_pool = slim.max_pool2d(net,
                                     3,
                                     stride=2,
                                     padding='VALID',
                                     scope='MaxPool_1a_3x3')
    net = tf.concat([tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool],
                    3)
    return net
Esempio n. 13
0
def create_test_network():
    """Convolutional neural network for test.

  Returns:
    g: Tensorflow graph object (Graph proto).
  """
    g = tf.Graph()
    with g.as_default():
        # An input test image with unknown spatial resolution.
        x = tf.placeholder(tf.float32, (None, None, None, 1),
                           name='input_image')
        # Left branch before first addition.
        l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID')
        # Right branch before first addition.
        l2_pad = tf.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]], name='L2_pad')
        l2 = slim.conv2d(l2_pad,
                         1, [3, 3],
                         stride=2,
                         scope='L2',
                         padding='VALID')
        l3 = slim.max_pool2d(l2, [3, 3], stride=2, scope='L3', padding='SAME')
        # First addition.
        l4 = tf.nn.relu(l1 + l3, name='L4_relu')
        # Left branch after first addition.
        l5 = slim.conv2d(l4, 1, [1, 1], stride=2, scope='L5', padding='SAME')
        # Right branch after first addition.
        l6 = slim.conv2d(l4, 1, [3, 3], stride=2, scope='L6', padding='SAME')
        # Final addition.
        tf.add(l5, l6, name='L7_add')

    return g
Esempio n. 14
0
def block_reduction_a(inputs, scope=None, reuse=None):
    """Builds Reduction-A block for Inception v4 network."""
    # By default use stride=1 and SAME padding
    with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
                        stride=1,
                        padding='SAME'):
        with tf.variable_scope(scope, 'BlockReductionA', [inputs],
                               reuse=reuse):
            with tf.variable_scope('Branch_0'):
                branch_0 = slim.conv2d(inputs,
                                       384, [3, 3],
                                       stride=2,
                                       padding='VALID',
                                       scope='Conv2d_1a_3x3')
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(inputs,
                                       192, [1, 1],
                                       scope='Conv2d_0a_1x1')
                branch_1 = slim.conv2d(branch_1,
                                       224, [3, 3],
                                       scope='Conv2d_0b_3x3')
                branch_1 = slim.conv2d(branch_1,
                                       256, [3, 3],
                                       stride=2,
                                       padding='VALID',
                                       scope='Conv2d_1a_3x3')
            with tf.variable_scope('Branch_2'):
                branch_2 = slim.max_pool2d(inputs, [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='MaxPool_1a_3x3')
            return tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
Esempio n. 15
0
def reduction_a(net, k, l, m, n):
    with tf.variable_scope('Branch_0'):
        tower_conv = slim.conv2d(net,
                                 n,
                                 3,
                                 stride=2,
                                 padding='VALID',
                                 scope='Conv2d_1a_3x3')
    with tf.variable_scope('Branch_1'):
        tower_conv1_0 = slim.conv2d(net, k, 1, scope='Conv2d_0a_1x1')
        tower_conv1_1 = slim.conv2d(tower_conv1_0, l, 3, scope='Conv2d_0b_3x3')
        tower_conv1_2 = slim.conv2d(tower_conv1_1,
                                    m,
                                    3,
                                    stride=2,
                                    padding='VALID',
                                    scope='Conv2d_1a_3x3')
    with tf.variable_scope('Branch_2'):
        tower_pool = slim.max_pool2d(net,
                                     3,
                                     stride=2,
                                     padding='VALID',
                                     scope='MaxPool_1a_3x3')
    net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3)
    return net
Esempio n. 16
0
def create_net(
    SPEC_HEIGHT,
    HWW_X,
    LEARN_LOG,
    NUM_FILTERS,
    WIGGLE_ROOM,
    CONV_FILTER_WIDTH,
    NUM_DENSE_UNITS,
    DO_BATCH_NORM,
):

    channels = 4
    net = collections.OrderedDict()

    net["input"] = tf.compat.v1.placeholder(
        tf.float32, (None, SPEC_HEIGHT, HWW_X * 2, channels), name="input"
    )
    net["conv1_1"] = slim.conv2d(
        net["input"],
        NUM_FILTERS,
        (SPEC_HEIGHT - WIGGLE_ROOM, CONV_FILTER_WIDTH),
        padding="valid",
        activation_fn=None,
        biases_initializer=None,
    )
    net["conv1_1"] = tf.nn.leaky_relu(net["conv1_1"], alpha=1 / 3)

    net["conv1_2"] = slim.conv2d(
        net["conv1_1"],
        NUM_FILTERS,
        (1, 3),
        padding="valid",
        activation_fn=None,
        biases_initializer=None,
    )
    net["conv1_2"] = tf.nn.leaky_relu(net["conv1_2"], alpha=1 / 3)

    W = net["conv1_2"].shape[2]
    net["pool2"] = slim.max_pool2d(net["conv1_2"], kernel_size=(1, W), stride=(1, 1))

    net["pool2"] = tf.transpose(net["pool2"], (0, 3, 2, 1))
    net["pool2_flat"] = slim.flatten(net["pool2"])

    net["fc6"] = slim.fully_connected(
        net["pool2_flat"], NUM_DENSE_UNITS, activation_fn=None, biases_initializer=None
    )
    net["fc6"] = tf.nn.leaky_relu(net["fc6"], alpha=1 / 3)

    net["fc7"] = slim.fully_connected(
        net["fc6"], NUM_DENSE_UNITS, activation_fn=None, biases_initializer=None
    )
    net["fc7"] = tf.nn.leaky_relu(net["fc7"], alpha=1 / 3)

    net["fc8"] = slim.fully_connected(net["fc7"], 2, activation_fn=None)
    # net['fc8'] = tf.nn.leaky_relu(net['fc8'], alpha=1/3)
    net["output"] = tf.nn.softmax(net["fc8"])

    return net
  def _extract_box_classifier_features(self, proposal_feature_maps, scope):
    """Extracts second stage box classifier features.

    This function reconstructs the "second half" of the Inception ResNet v2
    network after the part defined in `_extract_proposal_features`.

    Args:
      proposal_feature_maps: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
        representing the feature map cropped to each proposal.
      scope: A scope name.

    Returns:
      proposal_classifier_features: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, height, width, depth]
        representing box classifier features for each proposal.
    """
    with tf.variable_scope('InceptionResnetV2', reuse=self._reuse_weights):
      with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope(
          weight_decay=self._weight_decay)):
        # Forces is_training to False to disable batch norm update.
        with slim.arg_scope([slim.batch_norm],
                            is_training=self._train_batch_norm):
          with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                              stride=1, padding='SAME'):
            with tf.variable_scope('Mixed_7a'):
              with tf.variable_scope('Branch_0'):
                tower_conv = slim.conv2d(proposal_feature_maps,
                                         256, 1, scope='Conv2d_0a_1x1')
                tower_conv_1 = slim.conv2d(
                    tower_conv, 384, 3, stride=2,
                    padding='VALID', scope='Conv2d_1a_3x3')
              with tf.variable_scope('Branch_1'):
                tower_conv1 = slim.conv2d(
                    proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1')
                tower_conv1_1 = slim.conv2d(
                    tower_conv1, 288, 3, stride=2,
                    padding='VALID', scope='Conv2d_1a_3x3')
              with tf.variable_scope('Branch_2'):
                tower_conv2 = slim.conv2d(
                    proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1')
                tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3,
                                            scope='Conv2d_0b_3x3')
                tower_conv2_2 = slim.conv2d(
                    tower_conv2_1, 320, 3, stride=2,
                    padding='VALID', scope='Conv2d_1a_3x3')
              with tf.variable_scope('Branch_3'):
                tower_pool = slim.max_pool2d(
                    proposal_feature_maps, 3, stride=2, padding='VALID',
                    scope='MaxPool_1a_3x3')
              net = tf.concat(
                  [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3)
            net = slim.repeat(net, 9, inception_resnet_v2.block8, scale=0.20)
            net = inception_resnet_v2.block8(net, activation_fn=None)
            proposal_classifier_features = slim.conv2d(
                net, 1536, 1, scope='Conv2d_7b_1x1')
        return proposal_classifier_features
Esempio n. 18
0
 def res_yolo(self, inputs, filters, res_num):
     inputs = slim.conv2d(inputs, filters, [3, 3])
     inputs = slim.max_pool2d(inputs, [2, 2])
     for i in range(res_num):
         shortcut = inputs
         inputs = slim.conv2d(inputs, filters/2, [1, 1])
         inputs = slim.conv2d(inputs, filters, [3, 3])
         inputs = inputs + shortcut
     return inputs
Esempio n. 19
0
def vgg_16(inputs, scope='vgg_16'):
    with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc:
        with slim.arg_scope(
            [slim.conv2d, slim.fully_connected, slim.max_pool2d]):
            net = slim.repeat(inputs,
                              2,
                              slim.conv2d,
                              64, [3, 3],
                              scope='conv1')
            net = slim.max_pool2d(net, [2, 2], scope='pool1')
            net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
            net = slim.max_pool2d(net, [2, 2], scope='pool2')
            net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
            net = slim.max_pool2d(net, [2, 2], scope='pool3')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
            net = slim.max_pool2d(net, [2, 2], scope='pool4')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')

    return net
Esempio n. 20
0
  def build(self, input_tensors, is_training, lengths=None):
    """Applies 2d max pooling on the input tensor."""
    input_tensor = input_tensors[-1]
    if input_tensor.get_shape().as_list()[2] < self._pool_size:
      return input_tensors

    max_pool = tf_slim.max_pool2d(
        input_tensor,
        self._pool_size,
        stride=self._strides,
        padding='same',
    )
    return input_tensors + [max_pool]
def subsample(inputs, factor, scope=None):
    """
    Subsample the input along the spatial dimensions.
    :param inputs: A `Tensor` of size [batch, height_in, width_in, channels].
    :param factor: The subsampling factor.
    :param scope: Optional variable_scope.
    :return: output: A `Tensor` of size [batch, height_out, width_out, channels] with the
      input, either intact (if factor == 1) or subsampled (if factor > 1).
    """
    if factor == 1:
        return inputs
    else:
        return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)
Esempio n. 22
0
def _apply_size_dependent_ordering(input_feature, feature_level, block_level,
                                   expansion_size, use_explicit_padding,
                                   use_native_resize_op):
  """Applies Size-Dependent-Ordering when resizing feature maps.

     See https://arxiv.org/abs/1912.01106

  Args:
    input_feature: input feature map to be resized.
    feature_level: the level of the input feature.
    block_level: the desired output level for the block.
    expansion_size: the expansion size for the block.
    use_explicit_padding: Whether to use explicit padding.
    use_native_resize_op: Whether to use native resize op.

  Returns:
    A transformed feature at the desired resolution and expansion size.
  """
  padding = 'VALID' if use_explicit_padding else 'SAME'
  if feature_level >= block_level:  # Perform 1x1 then upsampling.
    node = slim.conv2d(
        input_feature,
        expansion_size, [1, 1],
        activation_fn=None,
        normalizer_fn=slim.batch_norm,
        padding=padding,
        scope='Conv1x1')
    if feature_level == block_level:
      return node
    scale = 2**(feature_level - block_level)
    if use_native_resize_op:
      input_shape = shape_utils.combined_static_and_dynamic_shape(node)
      node = tf.image.resize_nearest_neighbor(
          node, [input_shape[1] * scale, input_shape[2] * scale])
    else:
      node = ops.nearest_neighbor_upsampling(node, scale=scale)
  else:  # Perform downsampling then 1x1.
    stride = 2**(block_level - feature_level)
    node = slim.max_pool2d(
        _maybe_pad(input_feature, use_explicit_padding), [3, 3],
        stride=[stride, stride],
        padding=padding,
        scope='Downsample')
    node = slim.conv2d(
        node,
        expansion_size, [1, 1],
        activation_fn=None,
        normalizer_fn=slim.batch_norm,
        padding=padding,
        scope='Conv1x1')
  return node
Esempio n. 23
0
def create_net(SPEC_HEIGHT, HWW_X, LEARN_LOG, NUM_FILTERS, WIGGLE_ROOM,
               CONV_FILTER_WIDTH, NUM_DENSE_UNITS, DO_BATCH_NORM):

    tf.compat.v1.disable_eager_execution()

    channels = 4
    net = collections.OrderedDict()

    net['input'] = tf.placeholder(tf.float32,
                                  (None, SPEC_HEIGHT, HWW_X * 2, channels),
                                  name='input')
    net['conv1_1'] = slim.conv2d(
        net['input'],
        NUM_FILTERS, (SPEC_HEIGHT - WIGGLE_ROOM, CONV_FILTER_WIDTH),
        padding='valid',
        activation_fn=None,
        biases_initializer=None)
    net['conv1_1'] = tf.nn.leaky_relu(net['conv1_1'], alpha=1 / 3)

    net['conv1_2'] = slim.conv2d(net['conv1_1'],
                                 NUM_FILTERS, (1, 3),
                                 padding='valid',
                                 activation_fn=None,
                                 biases_initializer=None)
    net['conv1_2'] = tf.nn.leaky_relu(net['conv1_2'], alpha=1 / 3)

    W = net['conv1_2'].shape[2]
    net['pool2'] = slim.max_pool2d(net['conv1_2'],
                                   kernel_size=(1, W),
                                   stride=(1, 1))

    net['pool2'] = tf.transpose(net['pool2'], (0, 3, 2, 1))
    net['pool2_flat'] = slim.flatten(net['pool2'])

    net['fc6'] = slim.fully_connected(net['pool2_flat'],
                                      NUM_DENSE_UNITS,
                                      activation_fn=None,
                                      biases_initializer=None)
    net['fc6'] = tf.nn.leaky_relu(net['fc6'], alpha=1 / 3)

    net['fc7'] = slim.fully_connected(net['fc6'],
                                      NUM_DENSE_UNITS,
                                      activation_fn=None,
                                      biases_initializer=None)
    net['fc7'] = tf.nn.leaky_relu(net['fc7'], alpha=1 / 3)

    net['fc8'] = slim.fully_connected(net['fc7'], 2, activation_fn=None)
    # net['fc8'] = tf.nn.leaky_relu(net['fc8'], alpha=1/3)
    net['output'] = tf.nn.softmax(net['fc8'])

    return net
Esempio n. 24
0
    def create_network(self, input, trainable):
        if trainable:
            wr = slim.l2_regularizer(self.regularization)
        else:
            wr = None

        # the input is stack of black and white frames.
        # put the stack in the place of channel (last in tf)
        input_t = tf.transpose(input, [0, 2, 3, 1])

        net = slim.conv2d(input_t,
                          8, (7, 7),
                          data_format="NHWC",
                          activation_fn=tf.nn.relu,
                          stride=3,
                          weights_regularizer=wr,
                          trainable=trainable)
        net = slim.max_pool2d(net, 2, 2)
        net = slim.conv2d(net,
                          16, (3, 3),
                          data_format="NHWC",
                          activation_fn=tf.nn.relu,
                          weights_regularizer=wr,
                          trainable=trainable)
        net = slim.max_pool2d(net, 2, 2)
        net = slim.flatten(net)
        net = slim.fully_connected(net,
                                   256,
                                   activation_fn=tf.nn.relu,
                                   weights_regularizer=wr,
                                   trainable=trainable)
        q_state_action_values = slim.fully_connected(net,
                                                     self.dim_actions,
                                                     activation_fn=None,
                                                     weights_regularizer=wr,
                                                     trainable=trainable)

        return q_state_action_values
def create_test_network(placeholder_resolution,
                        convert_variables_to_constants):
    """Convolutional neural network for test.

  Args:
    placeholder_resolution: Resolution to use for input placeholder. Used for
      height and width dimensions.
    convert_variables_to_constants: Whether to convert variables to constants.

  Returns:
    name_to_node: Dict keyed by node name, each entry containing the node's
      NodeDef.
  """
    g = tf.Graph()
    sess = tf.Session(graph=g)
    with g.as_default():
        # An input test image with unknown spatial resolution.
        x = tf.placeholder(
            tf.float32, (1, placeholder_resolution, placeholder_resolution, 1),
            name='input_image')
        # Left branch before first addition.
        l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID')
        # Right branch before first addition.
        l2_pad = tf.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]], name='L2_pad')
        l2 = slim.conv2d(l2_pad,
                         1, [3, 3],
                         stride=2,
                         scope='L2',
                         padding='VALID')
        l3 = slim.max_pool2d(l2, [3, 3], stride=2, scope='L3', padding='SAME')
        # First addition.
        l4 = tf.nn.relu(l1 + l3, name='L4_relu')
        # Left branch after first addition.
        l5 = slim.conv2d(l4, 1, [1, 1], stride=2, scope='L5', padding='SAME')
        # Right branch after first addition.
        l6 = slim.conv2d(l4, 1, [3, 3], stride=2, scope='L6', padding='SAME')
        # Final addition.
        tf.add(l5, l6, name='L7_add')

        if convert_variables_to_constants:
            sess.run(tf.global_variables_initializer())
            graph_def = tf.graph_util.convert_variables_to_constants(
                sess, g.as_graph_def(), ['L7_add'])
        else:
            graph_def = g.as_graph_def()

    name_to_node = graph_compute_order.parse_graph_nodes(graph_def)
    return name_to_node
Esempio n. 26
0
def _pooling(net, stride, operation):
    """Parses operation and performs the correct pooling operation on net."""
    padding = 'SAME'
    pooling_type, pooling_shape = _operation_to_pooling_info(operation)
    if pooling_type == 'avg':
        net = slim.avg_pool2d(net,
                              pooling_shape,
                              stride=stride,
                              padding=padding)
    elif pooling_type == 'max':
        net = slim.max_pool2d(net,
                              pooling_shape,
                              stride=stride,
                              padding=padding)
    else:
        raise NotImplementedError('Unimplemented pooling type: ', pooling_type)
    return net
Esempio n. 27
0
 def _downsample(self, net, num_filters, downsample_ratio, scope):
     """Perform maxpool downsampling then 1x1 conv."""
     add_fixed_padding = self._use_explicit_padding and downsample_ratio > 1
     padding = 'VALID' if add_fixed_padding else 'SAME'
     node_down = slim.max_pool2d(
         ops.fixed_padding(net, downsample_ratio +
                           1) if add_fixed_padding else net,
         [downsample_ratio + 1, downsample_ratio + 1],
         stride=[downsample_ratio, downsample_ratio],
         padding=padding,
         scope=scope + '/maxpool_downsampling')
     node_after_down = slim.conv2d(node_down,
                                   num_filters, [1, 1],
                                   activation_fn=tf.identity,
                                   normalizer_fn=self._normalization_fn,
                                   padding=padding,
                                   scope=scope + '/1x1_after_downsampling')
     return node_after_down
Esempio n. 28
0
def conv_net(inputs, hparams):
  """Builds the ConvNet from Kelz 2016."""
  with slim.arg_scope(
      [slim.conv2d, slim.fully_connected],
      activation_fn=tf.nn.relu,
      weights_initializer=slim.variance_scaling_initializer(
          factor=2.0, mode='FAN_AVG', uniform=True)):

    net = inputs
    i = 0
    for (conv_temporal_size, conv_freq_size,
         num_filters, freq_pool_size, dropout_amt) in zip(
             hparams.temporal_sizes, hparams.freq_sizes, hparams.num_filters,
             hparams.pool_sizes, hparams.dropout_keep_amts):
      net = slim.conv2d(
          net,
          num_filters, [conv_temporal_size, conv_freq_size],
          scope='conv' + str(i),
          normalizer_fn=slim.batch_norm)
      if freq_pool_size > 1:
        net = slim.max_pool2d(
            net, [1, freq_pool_size],
            stride=[1, freq_pool_size],
            scope='pool' + str(i))
      if dropout_amt < 1:
        net = slim.dropout(net, dropout_amt, scope='dropout' + str(i))
      i += 1

    # Flatten while preserving batch and time dimensions.
    dims = tf.shape(net)
    net = tf.reshape(
        net, (dims[0], dims[1], net.shape[2] * net.shape[3]),
        'flatten_end')

    net = slim.fully_connected(net, hparams.fc_size, scope='fc_end')
    net = slim.dropout(net, hparams.fc_dropout_keep_amt, scope='dropout_end')

    return net
Esempio n. 29
0
    def _crop_pool_layer(self, bottom, rois, name):
        with tf.variable_scope(name):
            batch_ids = tf.squeeze(
                tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
            # Get the normalized coordinates of bboxes
            bottom_shape = tf.shape(bottom)
            height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(
                self._feat_stride[0])
            width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(
                self._feat_stride[0])
            x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
            y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
            x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
            y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
            # Won't be backpropagated to rois anyway, but to save time
            bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], axis=1))
            pre_pool_size = cfg.FLAGS.roi_pooling_size * 2
            crops = tf.image.crop_and_resize(bottom,
                                             bboxes,
                                             tf.to_int32(batch_ids),
                                             [pre_pool_size, pre_pool_size],
                                             name="crops")

        return slim.max_pool2d(crops, [2, 2], padding='SAME')
Esempio n. 30
0
def attention_inception_v3_base(inputs,
                                final_endpoint='Mixed_7c',
                                min_depth=16,
                                depth_multiplier=1.0,
                                scope=None,
                                attention_module='',
                                attention_position='all'):
    """Inception model from http://arxiv.org/abs/1512.00567.

  Constructs an Inception v3 network from inputs to the given final endpoint.
  This method can construct the network up to the final inception block
  Mixed_7c.

  Note that the names of the layers in the paper do not correspond to the names
  of the endpoints registered by this function although they build the same
  network.

  Here is a mapping from the old_names to the new names:
  Old name          | New name
  =======================================
  conv0             | Conv2d_1a_3x3
  conv1             | Conv2d_2a_3x3
  conv2             | Conv2d_2b_3x3
  pool1             | MaxPool_3a_3x3
  conv3             | Conv2d_3b_1x1
  conv4             | Conv2d_4a_3x3
  pool2             | MaxPool_5a_3x3
  mixed_35x35x256a  | Mixed_5b
  mixed_35x35x288a  | Mixed_5c
  mixed_35x35x288b  | Mixed_5d
  mixed_17x17x768a  | Mixed_6a
  mixed_17x17x768b  | Mixed_6b
  mixed_17x17x768c  | Mixed_6c
  mixed_17x17x768d  | Mixed_6d
  mixed_17x17x768e  | Mixed_6e
  mixed_8x8x1280a   | Mixed_7a
  mixed_8x8x2048a   | Mixed_7b
  mixed_8x8x2048b   | Mixed_7c

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    final_endpoint: specifies the endpoint to construct the network up to. It
      can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
      'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3',
      'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c',
      'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c'].
    min_depth: Minimum depth value (number of channels) for all convolution ops.
      Enforced when depth_multiplier < 1, and not an active constraint when
      depth_multiplier >= 1.
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    scope: Optional variable_scope.
    attention_module: Optional attention_module. Accepted values are '' or
      'se_block'.
    attention_position: Optional attention_position. Default is 'all'. Accepted
      values are 'head', 'extractor', and 'all'.

  Returns:
    tensor_out: output tensor corresponding to the final_endpoint.
    end_points: a set of activations for external use, for example summaries or
                losses.

  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values,
                or depth_multiplier <= 0
  """
    # end_points will collect relevant activations for external use, for example
    # summaries or losses.
    end_points = {}

    def add_and_check_final(name, net):
        end_points[name] = net
        return name == final_endpoint

    def add_attention_layer(attention_module, attention_position, net,
                            end_point):
        if attention_module:
            if attention_position == 'extractor' or attention_position == 'all':
                end_point, net = att.attach_attention_module(
                    net, attention_module, end_point)
        return net, end_point

    if depth_multiplier <= 0:
        raise ValueError('depth_multiplier is not greater than zero.')
    depth = lambda d: max(int(d * depth_multiplier), min_depth)

    with tf.variable_scope(scope, 'InceptionV3', [inputs]):
        with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                            stride=1,
                            padding='VALID'):
            # 299 x 299 x 3
            end_point = 'Input'
            if attention_module:
                if attention_position == 'head' or attention_position == 'all':
                    end_point, net = att.attach_attention_module(
                        inputs, attention_module, end_point)
                    if add_and_check_final(end_point, net):
                        return net, end_points

                else:
                    net = inputs
            else:
                net = inputs

            end_point = 'Conv2d_1a_3x3'
            net = slim.conv2d(net,
                              depth(32), [3, 3],
                              stride=2,
                              scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 149 x 149 x 32
            end_point = 'Conv2d_2a_3x3'
            net = slim.conv2d(net, depth(32), [3, 3], scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 147 x 147 x 32
            end_point = 'Conv2d_2b_3x3'
            net = slim.conv2d(net,
                              depth(64), [3, 3],
                              padding='SAME',
                              scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 147 x 147 x 64
            end_point = 'MaxPool_3a_3x3'
            net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 73 x 73 x 64
            end_point = 'Conv2d_3b_1x1'
            net = slim.conv2d(net, depth(80), [1, 1], scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 73 x 73 x 80.
            end_point = 'Conv2d_4a_3x3'
            net = slim.conv2d(net, depth(192), [3, 3], scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 71 x 71 x 192.
            end_point = 'MaxPool_5a_3x3'
            net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
            # 35 x 35 x 192.

        # Inception blocks
        with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                            stride=1,
                            padding='SAME'):
            # mixed: 35 x 35 x 256.
            end_point = 'Mixed_5b'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(48), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(64), [5, 5],
                                           scope='Conv2d_0b_5x5')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0c_3x3')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(32), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_5b
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed_1: 35 x 35 x 288.
            end_point = 'Mixed_5c'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(48), [1, 1],
                                           scope='Conv2d_0b_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(64), [5, 5],
                                           scope='Conv_1_0c_5x5')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0c_3x3')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_5c
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed_2: 35 x 35 x 288.
            end_point = 'Mixed_5d'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(48), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(64), [5, 5],
                                           scope='Conv2d_0b_5x5')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0c_3x3')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_5d
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed_3: 17 x 17 x 768.
            end_point = 'Mixed_6a'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(384), [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='Conv2d_1a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(64), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(96), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(96), [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='Conv2d_1a_1x1')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.max_pool2d(net, [3, 3],
                                               stride=2,
                                               padding='VALID',
                                               scope='MaxPool_1a_3x3')
                net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_6a
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed4: 17 x 17 x 768.
            end_point = 'Mixed_6b'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(128), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(128), [1, 7],
                                           scope='Conv2d_0b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0c_7x1')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(128), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(128), [7, 1],
                                           scope='Conv2d_0b_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(128), [1, 7],
                                           scope='Conv2d_0c_1x7')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(128), [7, 1],
                                           scope='Conv2d_0d_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0e_1x7')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_6b
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed_5: 17 x 17 x 768.
            end_point = 'Mixed_6c'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(160), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(160), [1, 7],
                                           scope='Conv2d_0b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0c_7x1')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(160), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(160), [7, 1],
                                           scope='Conv2d_0b_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(160), [1, 7],
                                           scope='Conv2d_0c_1x7')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(160), [7, 1],
                                           scope='Conv2d_0d_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0e_1x7')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_6c
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed_6: 17 x 17 x 768.
            end_point = 'Mixed_6d'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(160), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(160), [1, 7],
                                           scope='Conv2d_0b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0c_7x1')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(160), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(160), [7, 1],
                                           scope='Conv2d_0b_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(160), [1, 7],
                                           scope='Conv2d_0c_1x7')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(160), [7, 1],
                                           scope='Conv2d_0d_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0e_1x7')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_6d
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed_7: 17 x 17 x 768.
            end_point = 'Mixed_6e'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0c_7x1')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0b_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0c_1x7')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0d_7x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0e_1x7')
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_6e
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed_8: 8 x 8 x 1280.
            end_point = 'Mixed_7a'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_0 = slim.conv2d(branch_0,
                                           depth(320), [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='Conv2d_1a_3x3')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [1, 7],
                                           scope='Conv2d_0b_1x7')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [7, 1],
                                           scope='Conv2d_0c_7x1')
                    branch_1 = slim.conv2d(branch_1,
                                           depth(192), [3, 3],
                                           stride=2,
                                           padding='VALID',
                                           scope='Conv2d_1a_3x3')
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.max_pool2d(net, [3, 3],
                                               stride=2,
                                               padding='VALID',
                                               scope='MaxPool_1a_3x3')
                net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_7a
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed_9: 8 x 8 x 2048.
            end_point = 'Mixed_7b'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(320), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(384), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = tf.concat(axis=3,
                                         values=[
                                             slim.conv2d(
                                                 branch_1,
                                                 depth(384), [1, 3],
                                                 scope='Conv2d_0b_1x3'),
                                             slim.conv2d(branch_1,
                                                         depth(384), [3, 1],
                                                         scope='Conv2d_0b_3x1')
                                         ])
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(448), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(384), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_2 = tf.concat(axis=3,
                                         values=[
                                             slim.conv2d(
                                                 branch_2,
                                                 depth(384), [1, 3],
                                                 scope='Conv2d_0c_1x3'),
                                             slim.conv2d(branch_2,
                                                         depth(384), [3, 1],
                                                         scope='Conv2d_0d_3x1')
                                         ])
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points

            # Attention Module after Mixed_7b
            net, end_point = add_attention_layer(attention_module,
                                                 attention_position, net,
                                                 end_point)
            if add_and_check_final(end_point, net):
                return net, end_points

            # mixed_10: 8 x 8 x 2048.
            end_point = 'Mixed_7c'
            with tf.variable_scope(end_point):
                with tf.variable_scope('Branch_0'):
                    branch_0 = slim.conv2d(net,
                                           depth(320), [1, 1],
                                           scope='Conv2d_0a_1x1')
                with tf.variable_scope('Branch_1'):
                    branch_1 = slim.conv2d(net,
                                           depth(384), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_1 = tf.concat(axis=3,
                                         values=[
                                             slim.conv2d(
                                                 branch_1,
                                                 depth(384), [1, 3],
                                                 scope='Conv2d_0b_1x3'),
                                             slim.conv2d(branch_1,
                                                         depth(384), [3, 1],
                                                         scope='Conv2d_0c_3x1')
                                         ])
                with tf.variable_scope('Branch_2'):
                    branch_2 = slim.conv2d(net,
                                           depth(448), [1, 1],
                                           scope='Conv2d_0a_1x1')
                    branch_2 = slim.conv2d(branch_2,
                                           depth(384), [3, 3],
                                           scope='Conv2d_0b_3x3')
                    branch_2 = tf.concat(axis=3,
                                         values=[
                                             slim.conv2d(
                                                 branch_2,
                                                 depth(384), [1, 3],
                                                 scope='Conv2d_0c_1x3'),
                                             slim.conv2d(branch_2,
                                                         depth(384), [3, 1],
                                                         scope='Conv2d_0d_3x1')
                                         ])
                with tf.variable_scope('Branch_3'):
                    branch_3 = slim.avg_pool2d(net, [3, 3],
                                               scope='AvgPool_0a_3x3')
                    branch_3 = slim.conv2d(branch_3,
                                           depth(192), [1, 1],
                                           scope='Conv2d_0b_1x1')
                net = tf.concat(
                    axis=3, values=[branch_0, branch_1, branch_2, branch_3])
            end_points[end_point] = net
            if end_point == final_endpoint: return net, end_points
        raise ValueError('Unknown final endpoint %s' % final_endpoint)