def vgg_dual_16(inputs1,
                inputs2,
                num_classes=1000,
                is_training=True,
                dropout_keep_prob=0.5,
                spatial_squeeze=True,
                scope='vgg_16',
                update_top_only = False,
                fc_conv_padding='VALID',
                reuse = False):
  with tf.compat.v1.variable_scope(scope, 'vgg_16', [inputs1]) as sc:
    end_points_collection = sc.name + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d.
    with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
                        outputs_collections=end_points_collection):
      nets = []
      for i, inputs in enumerate([inputs1, inputs2]):
        print(i > 0)
#         with slim.arg_scope(vgg_arg_scope(reuse = tf.compat.v1.AUTO_REUSE or (i > 0))):
        with slim.arg_scope(vgg_arg_scope(reuse = reuse or (i > 0))):
          net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
          net = slim.max_pool2d(net, [2, 2], scope='pool1')
          net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
          net = slim.max_pool2d(net, [2, 2], scope='pool2')
          net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
          net = slim.max_pool2d(net, [2, 2], scope='pool3')
          net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
          net = slim.max_pool2d(net, [2, 2], scope='pool4')
          # if update_top_only:
          #   net = tf.stop_gradient(net)
          net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
          nets.append(net)
      with slim.arg_scope(vgg_arg_scope(reuse = reuse)):
        net = tf.concat(nets, 3)
        net = slim.conv2d(net, 512, [1, 1], scope='conv6')
        net = slim.max_pool2d(net, [2, 2], stride = 2, scope='pool6')

        net = slim.conv2d(net, 512, [1, 1], scope='conv7')
        net = slim.max_pool2d(net, [2, 2], stride = 2, scope='pool7')

        #net = slim.max_pool2d(net, [2, 2], scope='pool6')
        # Use conv2d instead of fully_connected layers.
        #net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6')
        net = slim.conv2d(net, 2048, [7, 7], padding=fc_conv_padding, scope = 'fc6_')
        net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope = 'dropout6')
        # net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
        # net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope = 'dropout7')

        net = slim.conv2d(net, 2048, [1, 1], scope='fc7_')
        net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope = 'dropout7_')
        if num_classes is not None:
          net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                            normalizer_fn = None, scope = 'fc8')
        # Convert end_points_collection into a end_point dict.
        end_points = slim.utils.convert_collection_to_dict(end_points_collection)
        if spatial_squeeze:
          net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
          end_points[sc.name + '/fc8'] = net
        return net, end_points
Ejemplo n.º 2
0
def define_vggish_slim(training=False):
    """Defines the VGGish TensorFlow model.
  All ops are created in the current default graph, under the scope 'vggish/'.
  The input is a placeholder named 'vggish/input_features' of type float32 and
  shape [batch_size, num_frames, num_bands] where batch_size is variable and
  num_frames and num_bands are constants, and [num_frames, num_bands] represents
  a log-mel-scale spectrogram patch covering num_bands frequency bands and
  num_frames time frames (where each frame step is usually 10ms). This is
  produced by computing the stabilized log(mel-spectrogram + params.LOG_OFFSET).
  The output is an op named 'vggish/embedding' which produces the activations of
  a 128-D embedding layer, which is usually the penultimate layer when used as
  part of a full model with a final classifier layer.
  Args:
    training: If true, all parameters are marked trainable.
  Returns:
    The op 'vggish/embeddings'.
  """
    # Defaults:
    # - All weights are initialized to N(0, INIT_STDDEV).
    # - All biases are initialized to 0.
    # - All activations are ReLU.
    # - All convolutions are 3x3 with stride 1 and SAME padding.
    # - All max-pools are 2x2 with stride 2 and SAME padding.
    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                        weights_initializer=tf.truncated_normal_initializer(
                            stddev=params.INIT_STDDEV),
                        biases_initializer=tf.zeros_initializer(),
                        activation_fn=tf.nn.relu,
                        trainable=training), \
         slim.arg_scope([slim.conv2d],
                        kernel_size=[3, 3], stride=1, padding='SAME'), \
         slim.arg_scope([slim.max_pool2d],
                        kernel_size=[2, 2], stride=2, padding='SAME'), \
         tf.variable_scope('vggish'):
        # Input: a batch of 2-D log-mel-spectrogram patches.
        features = tf.placeholder(tf.float32,
                                  shape=(None, params.NUM_FRAMES,
                                         params.NUM_BANDS),
                                  name='input_features')
        # Reshape to 4-D so that we can convolve a batch with conv2d().
        net = tf.reshape(features,
                         [-1, params.NUM_FRAMES, params.NUM_BANDS, 1])

        # The VGG stack of alternating convolutions and max-pools.
        net = slim.conv2d(net, 64, scope='conv1')
        net = slim.max_pool2d(net, scope='pool1')
        net = slim.conv2d(net, 128, scope='conv2')
        net = slim.max_pool2d(net, scope='pool2')
        net = slim.repeat(net, 2, slim.conv2d, 256, scope='conv3')
        net = slim.max_pool2d(net, scope='pool3')
        net = slim.repeat(net, 2, slim.conv2d, 512, scope='conv4')
        net = slim.max_pool2d(net, scope='pool4')

        # Flatten before entering fully-connected layers
        net = slim.flatten(net)
        net = slim.repeat(net, 2, slim.fully_connected, 4096, scope='fc1')
        # The embedding layer.
        net = slim.fully_connected(net, params.EMBEDDING_SIZE, scope='fc2')
        return tf.identity(net, name='embedding')
Ejemplo n.º 3
0
    def build_head(self, is_training):

        # Main network
        # Layer  1
        net = slim.repeat(self._image,
                          2,
                          slim.conv2d,
                          64, [3, 3],
                          trainable=False,
                          scope='conv1')
        net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1')

        # Layer 2
        net = slim.repeat(net,
                          2,
                          slim.conv2d,
                          128, [3, 3],
                          trainable=False,
                          scope='conv2')
        net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2')

        # Layer 3
        net = slim.repeat(net,
                          3,
                          slim.conv2d,
                          256, [3, 3],
                          trainable=is_training,
                          scope='conv3')
        net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3')

        # Layer 4
        net = slim.repeat(net,
                          3,
                          slim.conv2d,
                          512, [3, 3],
                          trainable=is_training,
                          scope='conv4')
        net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4')

        # Layer 5
        net = slim.repeat(net,
                          3,
                          slim.conv2d,
                          512, [3, 3],
                          trainable=is_training,
                          scope='conv5')

        # Append network to summaries
        self._act_summaries.append(net)

        # Append network as head layer
        self._layers['head'] = net

        return net
Ejemplo n.º 4
0
def vgg_19(inputs,
           num_classes=1000,
           is_training=False,
           dropout_keep_prob=0.5,
           spatial_squeeze=True,
           scope='vgg_19',
           reuse = False,
           fc_conv_padding='VALID'):
  """Oxford Net VGG 19-Layers version E Example.
  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224.
  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.
    fc_conv_padding: the type of padding to use for the fully connected layer
      that is implemented as a convolutional layer. Use 'SAME' padding if you
      are applying the network in a fully convolutional manner and want to
      get a prediction map downsampled by a factor of 32 as an output. Otherwise,
      the output prediction map will be (input / 32) - 6 in case of 'VALID' padding.
  Returns:
    the last op containing the log predictions and end_points dict.
  """
  with tf.variable_scope(scope, 'vgg_19', [inputs], reuse=reuse) as sc:
    end_points_collection = sc.name + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d.
    with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
                        outputs_collections=end_points_collection):
      net = slim.repeat(inputs, 2, slim.conv2d, 64, 3, scope='conv1', reuse=reuse)
      net = slim.max_pool2d(net, [2, 2], scope='pool1')
      net = slim.repeat(net, 2, slim.conv2d, 128, 3, scope='conv2',reuse=reuse)
      net = slim.max_pool2d(net, [2, 2], scope='pool2')
      net = slim.repeat(net, 4, slim.conv2d, 256, 3, scope='conv3', reuse=reuse)
      net = slim.max_pool2d(net, [2, 2], scope='pool3')
      net = slim.repeat(net, 4, slim.conv2d, 512, 3, scope='conv4',reuse=reuse)
      net = slim.max_pool2d(net, [2, 2], scope='pool4')
      net = slim.repeat(net, 4, slim.conv2d, 512, 3, scope='conv5',reuse=reuse)
      net = slim.max_pool2d(net, [2, 2], scope='pool5')
      # Use conv2d instead of fully_connected layers.
      # Convert end_points_collection into a end_point dict.
      end_points = slim.utils.convert_collection_to_dict(end_points_collection)

      return net, end_points
  def _extract_box_classifier_features(self, proposal_feature_maps, scope):
    """Extracts second stage box classifier features.

    This function reconstructs the "second half" of the Inception ResNet v2
    network after the part defined in `_extract_proposal_features`.

    Args:
      proposal_feature_maps: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
        representing the feature map cropped to each proposal.
      scope: A scope name.

    Returns:
      proposal_classifier_features: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, height, width, depth]
        representing box classifier features for each proposal.
    """
    with tf.variable_scope('InceptionResnetV2', reuse=self._reuse_weights):
      with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope(
          weight_decay=self._weight_decay)):
        # Forces is_training to False to disable batch norm update.
        with slim.arg_scope([slim.batch_norm],
                            is_training=self._train_batch_norm):
          with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                              stride=1, padding='SAME'):
            with tf.variable_scope('Mixed_7a'):
              with tf.variable_scope('Branch_0'):
                tower_conv = slim.conv2d(proposal_feature_maps,
                                         256, 1, scope='Conv2d_0a_1x1')
                tower_conv_1 = slim.conv2d(
                    tower_conv, 384, 3, stride=2,
                    padding='VALID', scope='Conv2d_1a_3x3')
              with tf.variable_scope('Branch_1'):
                tower_conv1 = slim.conv2d(
                    proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1')
                tower_conv1_1 = slim.conv2d(
                    tower_conv1, 288, 3, stride=2,
                    padding='VALID', scope='Conv2d_1a_3x3')
              with tf.variable_scope('Branch_2'):
                tower_conv2 = slim.conv2d(
                    proposal_feature_maps, 256, 1, scope='Conv2d_0a_1x1')
                tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3,
                                            scope='Conv2d_0b_3x3')
                tower_conv2_2 = slim.conv2d(
                    tower_conv2_1, 320, 3, stride=2,
                    padding='VALID', scope='Conv2d_1a_3x3')
              with tf.variable_scope('Branch_3'):
                tower_pool = slim.max_pool2d(
                    proposal_feature_maps, 3, stride=2, padding='VALID',
                    scope='MaxPool_1a_3x3')
              net = tf.concat(
                  [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3)
            net = slim.repeat(net, 9, inception_resnet_v2.block8, scale=0.20)
            net = inception_resnet_v2.block8(net, activation_fn=None)
            proposal_classifier_features = slim.conv2d(
                net, 1536, 1, scope='Conv2d_7b_1x1')
        return proposal_classifier_features
Ejemplo n.º 6
0
def vgg_16(inputs, scope='vgg_16'):
    with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc:
        with slim.arg_scope(
            [slim.conv2d, slim.fully_connected, slim.max_pool2d]):
            net = slim.repeat(inputs,
                              2,
                              slim.conv2d,
                              64, [3, 3],
                              scope='conv1')
            net = slim.max_pool2d(net, [2, 2], scope='pool1')
            net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
            net = slim.max_pool2d(net, [2, 2], scope='pool2')
            net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
            net = slim.max_pool2d(net, [2, 2], scope='pool3')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
            net = slim.max_pool2d(net, [2, 2], scope='pool4')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')

    return net
Ejemplo n.º 7
0
def vgg_16_base(inputs,
                is_training=True,
                scope='vgg_16',
                fc_conv_padding='VALID',
                final_endpoint=None):
    """
    VGG16模型
    :param inputs:a tensor [batch_size, height, width, channels]
    :param num_classes:分类数
    :param is_training: 是否训练
    :param dropout_keep_prob: 训练时dropout保持激活的可能性
    :param spatial_squeeze:是否压缩输出的空间维度
    :param scope:变量的可选范围
    :param fc_conv_padding: 全连接层的填充类型 'SAME' or 'VALID'
    :param global_pool: a boolean flag .True: 则对分类模块的输入需用平均池化
    :return: net: VGG net
             end_points :a dict of tensors with intermediate activations.
    """
    end_points = {}
    with tf.compat.v1.variable_scope(scope, 'vgg_16', [inputs]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, slim.fully_connected, slim.max_pool2d],
                outputs_collections=end_points_collection):
            net = slim.repeat(inputs,
                              2,
                              slim.conv2d,
                              64, [3, 3],
                              scope='conv1')
            net = slim.max_pool2d(net, [2, 2], scope='pool1')
            net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
            net = slim.max_pool2d(net, [2, 2], scope='pool2')
            net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
            net = slim.max_pool2d(net, [2, 2], scope='pool3')
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
            net = slim.max_pool2d(net, [2, 2], scope='pool4')

            end_point = 'pool4'
            end_points[end_point] = net
            if end_point == final_endpoint:
                return net, end_points

            net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5')

            end_point = 'conv5_2'
            end_points[end_point] = net
            if end_point == final_endpoint:
                return net, end_points

            net = slim.repeat(net,
                              1,
                              slim.conv2d,
                              512, [3, 3],
                              scope='conv5_3')
            net = slim.max_pool2d(net, [2, 2], scope='pool5')

            # # Convert end_points_collection into a end_point dict.
            # end_points = slim.utils.convert_collection_to_dict(end_points_collection)
        return net, end_points
Ejemplo n.º 8
0
def create_network(inputs, is_training, scope="win19_dep9", reuse=False):
	num_maps = 64
	kw = 3
	kh = 3

	with tf.variable_scope(scope, reuse=reuse):
		with slim.arg_scope([slim.conv2d], padding='VALID', activation_fn=tf.nn.relu, 
			normalizer_fn=slim.batch_norm, normalizer_params={'is_training': is_training}):
			
			net = slim.conv2d(inputs, num_maps, [kh, kw], scope='conv_bn_relu1')
			net = slim.repeat(net, 7, slim.conv2d, num_maps, [kh, kw], scope='conv_bn_relu2_8')
			net = slim.conv2d(net, num_maps, [kh, kw], scope='conv9', activation_fn=None, 
					normalizer_fn=None)
			net = slim.batch_norm(net, is_training=is_training)	

	return net
Ejemplo n.º 9
0
    def call(self, inputs, step_type=None, network_states=()):
        del step_type  # unused.

        states, goals = inputs

        with slim.arg_scope([slim.fully_connected],
                            activation_fn=tf.nn.relu,
                            normalizer_fn=NORMALIZER_FN,
                            normalizer_params=NORMALIZER_PARAMS):

            with tf.compat.v1.variable_scope('encode_dynamics'):
                positions = states['position']
                next_positions = goals['position']
                delta_positions = tf.subtract(next_positions, positions,
                                              'delta_positions')
                net = tf.concat([positions, delta_positions], axis=-1)
                net = slim.repeat(net,
                                  2,
                                  slim.fully_connected,
                                  self._dim_fc_state,
                                  scope='fc')
                dynamics_feats = tf.identity(net, 'dynamics_feats')

                with tf.compat.v1.variable_scope('global_pool'):
                    dynamics_feats = global_pool(dynamics_feats,
                                                 axis=1,
                                                 mask=states['body_mask'],
                                                 mode='reduce_sum')

            with tf.compat.v1.variable_scope('inference'):
                net = dynamics_feats
                net = slim.fully_connected(net, self._dim_fc_state, scope='fc')
                gaussian_params = slim.fully_connected(net,
                                                       2 * self._dim_c,
                                                       activation_fn=None,
                                                       normalizer_fn=None,
                                                       scope='gaussian_params')
                c_means = tf.identity(gaussian_params[..., :self._dim_c],
                                      name='c_means')
                c_stddevs = tf.add(tf.nn.softplus(
                    gaussian_params[..., self._dim_c:]),
                                   1e-6,
                                   name='c_stddevs')

        return c_means, c_stddevs
def _decoder_with_concat_merge(decoder_features_list,
                               decoder_depth,
                               decoder_use_separable_conv=True,
                               weight_decay=0.0001,
                               scope_suffix=''):
    """Decoder with concatenation to merge features.

  This decoder method applies two convolutions to smooth the features obtained
  by concatenating the input decoder_features_list.

  This decoder module is proposed in the DeepLabv3+ paper.

  Args:
    decoder_features_list: A list of decoder features.
    decoder_depth: Integer, the filters used in the convolution.
    decoder_use_separable_conv: Boolean, use separable conv or not.
    weight_decay: Weight decay for the model variables.
    scope_suffix: String, used in the scope suffix.

  Returns:
    decoder features merged with concatenation.
  """
    if decoder_use_separable_conv:
        decoder_features = split_separable_conv2d(
            tf.concat(decoder_features_list, 3),
            filters=decoder_depth,
            rate=1,
            weight_decay=weight_decay,
            scope='decoder_conv0' + scope_suffix)
        decoder_features = split_separable_conv2d(decoder_features,
                                                  filters=decoder_depth,
                                                  rate=1,
                                                  weight_decay=weight_decay,
                                                  scope='decoder_conv1' +
                                                  scope_suffix)
    else:
        num_convs = 2
        decoder_features = slim.repeat(tf.concat(decoder_features_list, 3),
                                       num_convs,
                                       slim.conv2d,
                                       decoder_depth,
                                       3,
                                       scope='decoder_conv' + scope_suffix)
    return decoder_features
Ejemplo n.º 11
0
def encode_relation(positions, body_masks, dim_fc_state):
    """Encode the relation feature.

    Args:
        positions: Positions of the bodies.
        body_masks: Masks of valid bodies.
        dim_fc_state: Dimension of state encoding.

    Returns:
        A tensor of shape [batch_size, num_bodies, dim_fc_state].
    """
    with slim.arg_scope([slim.fully_connected],
                        activation_fn=tf.nn.relu,
                        normalizer_fn=NORMALIZER_FN,
                        normalizer_params=NORMALIZER_PARAMS):

        with tf.compat.v1.variable_scope('relation_masks'):
            body_masks = tf.identity(body_masks, 'body_masks')
            relation_masks = tf.subtract(
                tf.multiply(tf.expand_dims(body_masks, -1),
                            tf.expand_dims(body_masks, -2)),
                tf.linalg.diag(body_masks))
            relation_masks = tf.expand_dims(relation_masks, axis=-1)

        with tf.compat.v1.variable_scope('relation_feats'):
            net = tf.subtract(tf.expand_dims(positions, axis=1),
                              tf.expand_dims(positions, axis=2))
            net = slim.repeat(net,
                              2,
                              slim.fully_connected,
                              dim_fc_state,
                              scope='fc')
            relation_feats = net

    return tf.reduce_sum(relation_feats * relation_masks,
                         axis=1,
                         name='sum_relation_feats')
Ejemplo n.º 12
0
def vgg_a(inputs,
          num_classes=1000,
          is_training=True,
          dropout_keep_prob=0.5,
          spatial_squeeze=True,
          scope='vgg_a',
          fc_conv_padding='VALID',
          global_pool=False):
  """Oxford Net VGG 11-Layers version A Example.

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes. If 0 or None, the logits layer is
      omitted and the input features to the logits layer are returned instead.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.
    fc_conv_padding: the type of padding to use for the fully connected layer
      that is implemented as a convolutional layer. Use 'SAME' padding if you
      are applying the network in a fully convolutional manner and want to
      get a prediction map downsampled by a factor of 32 as an output.
      Otherwise, the output prediction map will be (input / 32) - 6 in case of
      'VALID' padding.
    global_pool: Optional boolean flag. If True, the input to the classification
      layer is avgpooled to size 1x1, for any input size. (This is not part
      of the original VGG architecture.)

  Returns:
    net: the output of the logits layer (if num_classes is a non-zero integer),
      or the input to the logits layer (if num_classes is 0 or None).
    end_points: a dict of tensors with intermediate activations.
  """
  with tf.compat.v1.variable_scope(scope, 'vgg_a', [inputs]) as sc:
    end_points_collection = sc.original_name_scope + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d.
    with slim.arg_scope([slim.conv2d, slim.max_pool2d],
                        outputs_collections=end_points_collection):
      net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1')
      net = slim.max_pool2d(net, [2, 2], scope='pool1')
      net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2')
      net = slim.max_pool2d(net, [2, 2], scope='pool2')
      net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3')
      net = slim.max_pool2d(net, [2, 2], scope='pool3')
      net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4')
      net = slim.max_pool2d(net, [2, 2], scope='pool4')
      net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5')
      net = slim.max_pool2d(net, [2, 2], scope='pool5')

      # Use conv2d instead of fully_connected layers.
      net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6')
      net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                         scope='dropout6')
      net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
      # Convert end_points_collection into a end_point dict.
      end_points = slim.utils.convert_collection_to_dict(end_points_collection)
      if global_pool:
        net = tf.reduce_mean(input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool')
        end_points['global_pool'] = net
      if num_classes:
        net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                           scope='dropout7')
        net = slim.conv2d(net, num_classes, [1, 1],
                          activation_fn=None,
                          normalizer_fn=None,
                          scope='fc8')
        if spatial_squeeze:
          net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
        end_points[sc.name + '/fc8'] = net
      return net, end_points
Ejemplo n.º 13
0
def inception_resnet_v1(inputs,
                        is_training=True,
                        dropout_keep_prob=0.8,
                        bottleneck_layer_size=128,
                        reuse=None,
                        scope='InceptionResnetV1'):
    """Creates the Inception Resnet V1 model.
    Args:
      inputs: a 4-D tensor of size [batch_size, height, width, 3].
      num_classes: number of predicted classes.
      is_training: whether is training or not.
      dropout_keep_prob: float, the fraction to keep before final layer.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.
    Returns:
      logits: the logits outputs of the model.
      end_points: the set of end_points from the inception model.
    """
    end_points = {}

    with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse):
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME'):
                # 149 x 149 x 32
                net = slim.conv2d(inputs,
                                  32,
                                  3,
                                  stride=2,
                                  padding='VALID',
                                  scope='Conv2d_1a_3x3')
                end_points['Conv2d_1a_3x3'] = net
                # 147 x 147 x 32
                net = slim.conv2d(net,
                                  32,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_2a_3x3')
                end_points['Conv2d_2a_3x3'] = net
                # 147 x 147 x 64
                net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
                end_points['Conv2d_2b_3x3'] = net
                # 73 x 73 x 64
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='VALID',
                                      scope='MaxPool_3a_3x3')
                end_points['MaxPool_3a_3x3'] = net
                # 73 x 73 x 80
                net = slim.conv2d(net,
                                  80,
                                  1,
                                  padding='VALID',
                                  scope='Conv2d_3b_1x1')
                end_points['Conv2d_3b_1x1'] = net
                # 71 x 71 x 192
                net = slim.conv2d(net,
                                  192,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_4a_3x3')
                end_points['Conv2d_4a_3x3'] = net
                # 35 x 35 x 256
                net = slim.conv2d(net,
                                  256,
                                  3,
                                  stride=2,
                                  padding='VALID',
                                  scope='Conv2d_4b_3x3')
                end_points['Conv2d_4b_3x3'] = net

                # 5 x Inception-resnet-A
                net = slim.repeat(net, 5, block35, scale=0.17)

                # Reduction-A
                with tf.variable_scope('Mixed_6a'):
                    net = reduction_a(net, 192, 192, 256, 384)
                end_points['Mixed_6a'] = net

                # 10 x Inception-Resnet-B
                net = slim.repeat(net, 10, block17, scale=0.10)

                # Reduction-B
                with tf.variable_scope('Mixed_7a'):
                    net = reduction_b(net)
                end_points['Mixed_7a'] = net

                # 5 x Inception-Resnet-C
                net = slim.repeat(net, 5, block8, scale=0.20)
                net = block8(net, activation_fn=None)

                with tf.variable_scope('Logits'):
                    end_points['PrePool'] = net
                    # pylint: disable=no-member
                    net = slim.avg_pool2d(net,
                                          net.get_shape()[1:3],
                                          padding='VALID',
                                          scope='AvgPool_1a_8x8')
                    net = slim.flatten(net)

                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       is_training=is_training,
                                       scope='Dropout')

                    end_points['PreLogitsFlatten'] = net

                net = slim.fully_connected(net,
                                           bottleneck_layer_size,
                                           activation_fn=None,
                                           scope='Bottleneck',
                                           reuse=False)

    return net, end_points
Ejemplo n.º 14
0
def inception_resnet_v2(inputs,
                        is_training=True,
                        dropout_keep_prob=0.8,
                        bottleneck_layer_size=128,
                        reuse=None,
                        scope='InceptionResnetV2'):
    """Creates the Inception Resnet V2 model.
    Args:
      inputs: a 4-D tensor of size [batch_size, height, width, 3].
      num_classes: number of predicted classes.
      is_training: whether is training or not.
      dropout_keep_prob: float, the fraction to keep before final layer.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.
    Returns:
      logits: the logits outputs of the model.
      end_points: the set of end_points from the inception model.
    """
    end_points = {}

    with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse):
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME'):

                # 149 x 149 x 32
                net = slim.conv2d(inputs,
                                  32,
                                  3,
                                  stride=2,
                                  padding='VALID',
                                  scope='Conv2d_1a_3x3')
                end_points['Conv2d_1a_3x3'] = net
                # 147 x 147 x 32
                net = slim.conv2d(net,
                                  32,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_2a_3x3')
                end_points['Conv2d_2a_3x3'] = net
                # 147 x 147 x 64
                net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
                end_points['Conv2d_2b_3x3'] = net
                # 73 x 73 x 64
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='VALID',
                                      scope='MaxPool_3a_3x3')
                end_points['MaxPool_3a_3x3'] = net
                # 73 x 73 x 80
                net = slim.conv2d(net,
                                  80,
                                  1,
                                  padding='VALID',
                                  scope='Conv2d_3b_1x1')
                end_points['Conv2d_3b_1x1'] = net
                # 71 x 71 x 192
                net = slim.conv2d(net,
                                  192,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_4a_3x3')
                end_points['Conv2d_4a_3x3'] = net
                # 35 x 35 x 192
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='VALID',
                                      scope='MaxPool_5a_3x3')
                end_points['MaxPool_5a_3x3'] = net

                # 35 x 35 x 320
                with tf.variable_scope('Mixed_5b'):
                    with tf.variable_scope('Branch_0'):
                        tower_conv = slim.conv2d(net,
                                                 96,
                                                 1,
                                                 scope='Conv2d_1x1')
                    with tf.variable_scope('Branch_1'):
                        tower_conv1_0 = slim.conv2d(net,
                                                    48,
                                                    1,
                                                    scope='Conv2d_0a_1x1')
                        tower_conv1_1 = slim.conv2d(tower_conv1_0,
                                                    64,
                                                    5,
                                                    scope='Conv2d_0b_5x5')
                    with tf.variable_scope('Branch_2'):
                        tower_conv2_0 = slim.conv2d(net,
                                                    64,
                                                    1,
                                                    scope='Conv2d_0a_1x1')
                        tower_conv2_1 = slim.conv2d(tower_conv2_0,
                                                    96,
                                                    3,
                                                    scope='Conv2d_0b_3x3')
                        tower_conv2_2 = slim.conv2d(tower_conv2_1,
                                                    96,
                                                    3,
                                                    scope='Conv2d_0c_3x3')
                    with tf.variable_scope('Branch_3'):
                        tower_pool = slim.avg_pool2d(net,
                                                     3,
                                                     stride=1,
                                                     padding='SAME',
                                                     scope='AvgPool_0a_3x3')
                        tower_pool_1 = slim.conv2d(tower_pool,
                                                   64,
                                                   1,
                                                   scope='Conv2d_0b_1x1')
                    net = tf.concat([
                        tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1
                    ], 3)

                end_points['Mixed_5b'] = net
                net = slim.repeat(net, 10, block35, scale=0.17)

                # 17 x 17 x 1024
                with tf.variable_scope('Mixed_6a'):
                    with tf.variable_scope('Branch_0'):
                        tower_conv = slim.conv2d(net,
                                                 384,
                                                 3,
                                                 stride=2,
                                                 padding='VALID',
                                                 scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_1'):
                        tower_conv1_0 = slim.conv2d(net,
                                                    256,
                                                    1,
                                                    scope='Conv2d_0a_1x1')
                        tower_conv1_1 = slim.conv2d(tower_conv1_0,
                                                    256,
                                                    3,
                                                    scope='Conv2d_0b_3x3')
                        tower_conv1_2 = slim.conv2d(tower_conv1_1,
                                                    384,
                                                    3,
                                                    stride=2,
                                                    padding='VALID',
                                                    scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_2'):
                        tower_pool = slim.max_pool2d(net,
                                                     3,
                                                     stride=2,
                                                     padding='VALID',
                                                     scope='MaxPool_1a_3x3')
                    net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3)

                end_points['Mixed_6a'] = net
                net = slim.repeat(net, 20, block17, scale=0.10)

                with tf.variable_scope('Mixed_7a'):
                    with tf.variable_scope('Branch_0'):
                        tower_conv = slim.conv2d(net,
                                                 256,
                                                 1,
                                                 scope='Conv2d_0a_1x1')
                        tower_conv_1 = slim.conv2d(tower_conv,
                                                   384,
                                                   3,
                                                   stride=2,
                                                   padding='VALID',
                                                   scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_1'):
                        tower_conv1 = slim.conv2d(net,
                                                  256,
                                                  1,
                                                  scope='Conv2d_0a_1x1')
                        tower_conv1_1 = slim.conv2d(tower_conv1,
                                                    288,
                                                    3,
                                                    stride=2,
                                                    padding='VALID',
                                                    scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_2'):
                        tower_conv2 = slim.conv2d(net,
                                                  256,
                                                  1,
                                                  scope='Conv2d_0a_1x1')
                        tower_conv2_1 = slim.conv2d(tower_conv2,
                                                    288,
                                                    3,
                                                    scope='Conv2d_0b_3x3')
                        tower_conv2_2 = slim.conv2d(tower_conv2_1,
                                                    320,
                                                    3,
                                                    stride=2,
                                                    padding='VALID',
                                                    scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_3'):
                        tower_pool = slim.max_pool2d(net,
                                                     3,
                                                     stride=2,
                                                     padding='VALID',
                                                     scope='MaxPool_1a_3x3')
                    net = tf.concat([
                        tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool
                    ], 3)

                end_points['Mixed_7a'] = net

                net = slim.repeat(net, 9, block8, scale=0.20)
                net = block8(net, activation_fn=None)

                net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1')
                end_points['Conv2d_7b_1x1'] = net

                with tf.variable_scope('Logits'):
                    end_points['PrePool'] = net
                    #pylint: disable=no-member
                    net = slim.avg_pool2d(net,
                                          net.get_shape()[1:3],
                                          padding='VALID',
                                          scope='AvgPool_1a_8x8')
                    net = slim.flatten(net)

                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       is_training=is_training,
                                       scope='Dropout')

                    end_points['PreLogitsFlatten'] = net

                net = slim.fully_connected(net,
                                           bottleneck_layer_size,
                                           activation_fn=None,
                                           scope='Bottleneck',
                                           reuse=False)

    return net, end_points
Ejemplo n.º 15
0
def vgg_16(inputs, reuse=False, pooling='avg', final_endpoint='fc8'):
    """VGG-16 implementation intended for test-time use.

  It takes inputs with values in [0, 1] and preprocesses them (scaling,
  mean-centering) before feeding them to the VGG-16 network.

  Args:
    inputs: A 4-D tensor of shape [batch_size, image_size, image_size, 3]
        and dtype float32, with values in [0, 1].
    reuse: bool. Whether to reuse model parameters. Defaults to False.
    pooling: str in {'avg', 'max'}, which pooling operation to use. Defaults
        to 'avg'.
    final_endpoint: str, specifies the endpoint to construct the network up to.
        Defaults to 'fc8'.

  Returns:
    A dict mapping end-point names to their corresponding Tensor.

  Raises:
    ValueError: the final_endpoint argument is not recognized.
  """
    inputs *= 255.0
    inputs -= tf.constant([123.68, 116.779, 103.939], dtype=tf.float32)

    pooling_fns = {'avg': slim.avg_pool2d, 'max': slim.max_pool2d}
    pooling_fn = pooling_fns[pooling]

    with tf.variable_scope('vgg_16', [inputs], reuse=reuse) as sc:
        end_points = {}

        def add_and_check_is_final(layer_name, net):
            end_points['%s/%s' % (sc.name, layer_name)] = net
            return layer_name == final_endpoint

        with slim.arg_scope([slim.conv2d], trainable=False):
            net = slim.repeat(inputs,
                              2,
                              slim.conv2d,
                              64, [3, 3],
                              scope='conv1')
            if add_and_check_is_final('conv1', net): return end_points
            net = pooling_fn(net, [2, 2], scope='pool1')
            if add_and_check_is_final('pool1', net): return end_points
            net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
            if add_and_check_is_final('conv2', net): return end_points
            net = pooling_fn(net, [2, 2], scope='pool2')
            if add_and_check_is_final('pool2', net): return end_points
            net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
            if add_and_check_is_final('conv3', net): return end_points
            net = pooling_fn(net, [2, 2], scope='pool3')
            if add_and_check_is_final('pool3', net): return end_points
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
            if add_and_check_is_final('conv4', net): return end_points
            net = pooling_fn(net, [2, 2], scope='pool4')
            if add_and_check_is_final('pool4', net): return end_points
            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
            if add_and_check_is_final('conv5', net): return end_points
            net = pooling_fn(net, [2, 2], scope='pool5')
            if add_and_check_is_final('pool5', net): return end_points
            # Use conv2d instead of fully_connected layers.
            net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
            if add_and_check_is_final('fc6', net): return end_points
            net = slim.dropout(net, 0.5, is_training=False, scope='dropout6')
            net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
            if add_and_check_is_final('fc7', net): return end_points
            net = slim.dropout(net, 0.5, is_training=False, scope='dropout7')
            net = slim.conv2d(net,
                              1000, [1, 1],
                              activation_fn=None,
                              scope='fc8')
            end_points[sc.name + '/predictions'] = slim.softmax(net)
            if add_and_check_is_final('fc8', net): return end_points

        raise ValueError('final_endpoint (%s) not recognized' % final_endpoint)
Ejemplo n.º 16
0
def inception_resnet_v2_base(inputs,
                             final_endpoint='Conv2d_7b_1x1',
                             output_stride=16,
                             align_feature_maps=False,
                             scope=None,
                             activation_fn=tf.nn.relu):
    """Inception model from  http://arxiv.org/abs/1602.07261.

  Constructs an Inception Resnet v2 network from inputs to the given final
  endpoint. This method can construct the network up to the final inception
  block Conv2d_7b_1x1.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    final_endpoint: specifies the endpoint to construct the network up to. It
      can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
      'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3',
      'Mixed_5b', 'Mixed_6a', 'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1']
    output_stride: A scalar that specifies the requested ratio of input to
      output spatial resolution. Only supports 8 and 16.
    align_feature_maps: When true, changes all the VALID paddings in the network
      to SAME padding so that the feature maps are aligned.
    scope: Optional variable_scope.
    activation_fn: Activation function for block scopes.

  Returns:
    tensor_out: output tensor corresponding to the final_endpoint.
    end_points: a set of activations for external use, for example summaries or
                losses.

  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values,
      or if the output_stride is not 8 or 16, or if the output_stride is 8 and
      we request an end point after 'PreAuxLogits'.
  """
    if output_stride != 8 and output_stride != 16:
        raise ValueError('output_stride must be 8 or 16.')

    padding = 'SAME' if align_feature_maps else 'VALID'

    end_points = {}

    def add_and_check_final(name, net):
        end_points[name] = net
        return name == final_endpoint

    with tf.variable_scope(scope, 'InceptionResnetV2', [inputs]):
        with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                            stride=1,
                            padding='SAME'):
            # 149 x 149 x 32
            net = slim.conv2d(inputs,
                              32,
                              3,
                              stride=2,
                              padding=padding,
                              scope='Conv2d_1a_3x3')
            if add_and_check_final('Conv2d_1a_3x3', net):
                return net, end_points

            # 147 x 147 x 32
            net = slim.conv2d(net,
                              32,
                              3,
                              padding=padding,
                              scope='Conv2d_2a_3x3')
            if add_and_check_final('Conv2d_2a_3x3', net):
                return net, end_points
            # 147 x 147 x 64
            net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
            if add_and_check_final('Conv2d_2b_3x3', net):
                return net, end_points
            # 73 x 73 x 64
            net = slim.max_pool2d(net,
                                  3,
                                  stride=2,
                                  padding=padding,
                                  scope='MaxPool_3a_3x3')
            if add_and_check_final('MaxPool_3a_3x3', net):
                return net, end_points
            # 73 x 73 x 80
            net = slim.conv2d(net,
                              80,
                              1,
                              padding=padding,
                              scope='Conv2d_3b_1x1')
            if add_and_check_final('Conv2d_3b_1x1', net):
                return net, end_points
            # 71 x 71 x 192
            net = slim.conv2d(net,
                              192,
                              3,
                              padding=padding,
                              scope='Conv2d_4a_3x3')
            if add_and_check_final('Conv2d_4a_3x3', net):
                return net, end_points
            # 35 x 35 x 192
            net = slim.max_pool2d(net,
                                  3,
                                  stride=2,
                                  padding=padding,
                                  scope='MaxPool_5a_3x3')
            if add_and_check_final('MaxPool_5a_3x3', net):
                return net, end_points

            # 35 x 35 x 320
            with tf.variable_scope('Mixed_5b'):
                with tf.variable_scope('Branch_0'):
                    tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1')
                with tf.variable_scope('Branch_1'):
                    tower_conv1_0 = slim.conv2d(net,
                                                48,
                                                1,
                                                scope='Conv2d_0a_1x1')
                    tower_conv1_1 = slim.conv2d(tower_conv1_0,
                                                64,
                                                5,
                                                scope='Conv2d_0b_5x5')
                with tf.variable_scope('Branch_2'):
                    tower_conv2_0 = slim.conv2d(net,
                                                64,
                                                1,
                                                scope='Conv2d_0a_1x1')
                    tower_conv2_1 = slim.conv2d(tower_conv2_0,
                                                96,
                                                3,
                                                scope='Conv2d_0b_3x3')
                    tower_conv2_2 = slim.conv2d(tower_conv2_1,
                                                96,
                                                3,
                                                scope='Conv2d_0c_3x3')
                with tf.variable_scope('Branch_3'):
                    tower_pool = slim.avg_pool2d(net,
                                                 3,
                                                 stride=1,
                                                 padding='SAME',
                                                 scope='AvgPool_0a_3x3')
                    tower_pool_1 = slim.conv2d(tower_pool,
                                               64,
                                               1,
                                               scope='Conv2d_0b_1x1')
                net = tf.concat(
                    [tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1],
                    3)

            if add_and_check_final('Mixed_5b', net): return net, end_points
            # TODO(alemi): Register intermediate endpoints
            net = slim.repeat(net,
                              10,
                              block35,
                              scale=0.17,
                              activation_fn=activation_fn)

            # 17 x 17 x 1088 if output_stride == 8,
            # 33 x 33 x 1088 if output_stride == 16
            use_atrous = output_stride == 8

            with tf.variable_scope('Mixed_6a'):
                with tf.variable_scope('Branch_0'):
                    tower_conv = slim.conv2d(net,
                                             384,
                                             3,
                                             stride=1 if use_atrous else 2,
                                             padding=padding,
                                             scope='Conv2d_1a_3x3')
                with tf.variable_scope('Branch_1'):
                    tower_conv1_0 = slim.conv2d(net,
                                                256,
                                                1,
                                                scope='Conv2d_0a_1x1')
                    tower_conv1_1 = slim.conv2d(tower_conv1_0,
                                                256,
                                                3,
                                                scope='Conv2d_0b_3x3')
                    tower_conv1_2 = slim.conv2d(tower_conv1_1,
                                                384,
                                                3,
                                                stride=1 if use_atrous else 2,
                                                padding=padding,
                                                scope='Conv2d_1a_3x3')
                with tf.variable_scope('Branch_2'):
                    tower_pool = slim.max_pool2d(net,
                                                 3,
                                                 stride=1 if use_atrous else 2,
                                                 padding=padding,
                                                 scope='MaxPool_1a_3x3')
                net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3)

            if add_and_check_final('Mixed_6a', net): return net, end_points

            # TODO(alemi): register intermediate endpoints
            with slim.arg_scope([slim.conv2d], rate=2 if use_atrous else 1):
                net = slim.repeat(net,
                                  20,
                                  block17,
                                  scale=0.10,
                                  activation_fn=activation_fn)
            if add_and_check_final('PreAuxLogits', net): return net, end_points

            if output_stride == 8:
                # TODO(gpapan): Properly support output_stride for the rest of the net.
                raise ValueError(
                    'output_stride==8 is only supported up to the '
                    'PreAuxlogits end_point for now.')

            # 8 x 8 x 2080
            with tf.variable_scope('Mixed_7a'):
                with tf.variable_scope('Branch_0'):
                    tower_conv = slim.conv2d(net,
                                             256,
                                             1,
                                             scope='Conv2d_0a_1x1')
                    tower_conv_1 = slim.conv2d(tower_conv,
                                               384,
                                               3,
                                               stride=2,
                                               padding=padding,
                                               scope='Conv2d_1a_3x3')
                with tf.variable_scope('Branch_1'):
                    tower_conv1 = slim.conv2d(net,
                                              256,
                                              1,
                                              scope='Conv2d_0a_1x1')
                    tower_conv1_1 = slim.conv2d(tower_conv1,
                                                288,
                                                3,
                                                stride=2,
                                                padding=padding,
                                                scope='Conv2d_1a_3x3')
                with tf.variable_scope('Branch_2'):
                    tower_conv2 = slim.conv2d(net,
                                              256,
                                              1,
                                              scope='Conv2d_0a_1x1')
                    tower_conv2_1 = slim.conv2d(tower_conv2,
                                                288,
                                                3,
                                                scope='Conv2d_0b_3x3')
                    tower_conv2_2 = slim.conv2d(tower_conv2_1,
                                                320,
                                                3,
                                                stride=2,
                                                padding=padding,
                                                scope='Conv2d_1a_3x3')
                with tf.variable_scope('Branch_3'):
                    tower_pool = slim.max_pool2d(net,
                                                 3,
                                                 stride=2,
                                                 padding=padding,
                                                 scope='MaxPool_1a_3x3')
                net = tf.concat(
                    [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool],
                    3)

            if add_and_check_final('Mixed_7a', net): return net, end_points

            # TODO(alemi): register intermediate endpoints
            net = slim.repeat(net,
                              9,
                              block8,
                              scale=0.20,
                              activation_fn=activation_fn)
            net = block8(net, activation_fn=None)

            # 8 x 8 x 1536
            net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1')
            if add_and_check_final('Conv2d_7b_1x1', net):
                return net, end_points

        raise ValueError('final_endpoint (%s) not recognized', final_endpoint)
Ejemplo n.º 17
0
def vgg_16_hed_cam(
    inputs,
    cams,
    num_classes=1,
    is_training=True,
    add_v1net_early=False,
    add_v1net=False,
    reuse=None,
    reduce_conv=True,
    scope='vgg_16',
):
    """VGG-16 implementation of HED.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    is_training: whether or not the model is being trained.
    add_v1net: whether to add v1net blocks after convolutions.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional scope for the variables.
  Returns:
    side_outputs_fullres: list of side output logits resized to input resolution.
    end_points: a dict of tensors with intermediate activations.
  """
    side_outputs = []
    _, h, w, _ = inputs.shape.as_list()
    with tf.variable_scope(scope, 'vgg_16', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with slim.arg_scope(
            [slim.conv2d, slim.fully_connected, slim.max_pool2d],
                outputs_collections=end_points_collection):
            net = slim.repeat(inputs,
                              2,
                              slim.conv2d,
                              64, [3, 3],
                              scope='conv1')
            with tf.variable_scope("cam-conv1"):
                cam_net = slim.repeat(cams,
                                      1,
                                      slim.conv2d,
                                      64, [3, 3],
                                      scope="cam-conv1")
                net = net + cam_net

            if add_v1net_early and FLAGS.v1_timesteps:
                with tf.variable_scope("v1net-conv1"):
                    v1_timesteps, v1_kernel_size, n_filters = FLAGS.v1_timesteps, 3, 64
                    net = build_v1net(inputs=net,
                                      filters=n_filters,
                                      timesteps=v1_timesteps,
                                      kernel_size=v1_kernel_size,
                                      is_training=is_training)
            side_outputs.append(net)
            net = slim.max_pool2d(net, [2, 2], scope='pool1')
            cam_net = slim.max_pool2d(cam_net, [2, 2], scope='cam_pool1')

            net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
            with tf.variable_scope("cam-conv2"):
                cam_net = slim.repeat(cam_net,
                                      1,
                                      slim.conv2d,
                                      128, [3, 3],
                                      scope="cam-conv2")
                net = net + cam_net

            if add_v1net and FLAGS.v1_timesteps:
                with tf.variable_scope("v1net-conv2"):
                    v1_timesteps, v1_kernel_size, n_filters = FLAGS.v1_timesteps, 3, 128
                    net = build_v1net(inputs=net,
                                      filters=n_filters,
                                      timesteps=v1_timesteps,
                                      kernel_size=v1_kernel_size,
                                      is_training=is_training)
            side_outputs.append(net)
            net = slim.max_pool2d(net, [2, 2], scope='pool2')
            cam_net = slim.max_pool2d(cam_net, [2, 2], scope='cam_pool2')

            net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
            with tf.variable_scope("cam-conv3"):
                cam_net = slim.repeat(cam_net,
                                      1,
                                      slim.conv2d,
                                      256, [3, 3],
                                      scope="cam-conv3")
                net = net + cam_net

            if add_v1net and FLAGS.v1_timesteps:
                with tf.variable_scope("v1net-conv3"):
                    v1_timesteps, v1_kernel_size, n_filters = FLAGS.v1_timesteps, 3, 256
                    net = build_v1net(inputs=net,
                                      filters=n_filters,
                                      timesteps=v1_timesteps,
                                      kernel_size=v1_kernel_size,
                                      is_training=is_training)
            side_outputs.append(net)
            net = slim.max_pool2d(net, [2, 2], scope='pool3')
            cam_net = slim.max_pool2d(cam_net, [2, 2], scope='cam_pool3')

            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
            with tf.variable_scope("cam-conv4"):
                cam_net = slim.repeat(cam_net,
                                      1,
                                      slim.conv2d,
                                      512, [3, 3],
                                      scope="cam-conv4")
                net = net + cam_net

            if add_v1net and FLAGS.v1_timesteps:
                with tf.variable_scope("v1net-conv4"):
                    v1_timesteps, v1_kernel_size, n_filters = FLAGS.v1_timesteps, 3, 512
                    net = build_v1net(inputs=net,
                                      filters=n_filters,
                                      timesteps=v1_timesteps,
                                      kernel_size=v1_kernel_size,
                                      is_training=is_training)
            side_outputs.append(net)
            net = slim.max_pool2d(net, [2, 2], scope='pool4')
            cam_net = slim.max_pool2d(cam_net, [2, 2], scope='cam_pool4')

            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
            with tf.variable_scope("cam-conv5"):
                cam_net = slim.repeat(cam_net,
                                      1,
                                      slim.conv2d,
                                      512, [3, 3],
                                      scope="cam-conv5")
                net = net + cam_net

            if add_v1net and FLAGS.v1_timesteps:
                with tf.variable_scope("v1net-conv5"):
                    v1_timesteps, v1_kernel_size, n_filters = FLAGS.v1_timesteps, 3, 512
                    net = build_v1net(inputs=net,
                                      filters=n_filters,
                                      timesteps=v1_timesteps,
                                      kernel_size=v1_kernel_size,
                                      is_training=is_training)
            side_outputs.append(net)
            end_points = slim.utils.convert_collection_to_dict(
                end_points_collection)
            side_outputs_fullres = [side_outputs[0]]
            side_outputs_fullres = [
                tf.image.resize_bilinear(side_output, [h, w])
                for side_output in side_outputs[1:]
            ]
            with tf.variable_scope("side_output_classifiers", reuse=reuse):
                side_outputs_fullres = [
                    slim.conv2d(
                        side_output,
                        1,
                        [1, 1],
                        activation_fn=None,
                        normalizer_fn=None,
                    ) for side_output in side_outputs_fullres
                ]
            side_outputs_fullres = tf.stack(side_outputs_fullres, axis=0)
            if reduce_conv:
                with tf.variable_scope("side_output_fusion"):
                    side_outputs_ = tf.transpose(side_outputs_fullres,
                                                 (1, 2, 3, 4, 0))
                    side_outputs_ = tf.squeeze(side_outputs_, axis=3)
                    fused_predictions = fuse_predictions(side_outputs_)
            else:
                fused_predictions = tf.reduce_mean(side_outputs_fullres,
                                                   axis=0)
            end_points['fused_predictions'] = fused_predictions
            side_outputs_fullres = tf.reshape(side_outputs_fullres,
                                              (-1, h, w, 1))
            end_points['side_outputs_fullres'] = side_outputs_fullres
            return fused_predictions, end_points
Ejemplo n.º 18
0
def vgg_16_hed(
    inputs,
    cams=None,
    num_classes=1,
    is_training=True,
    add_v1net_early=False,
    add_v1net=False,
    reuse=None,
    reduce_conv=True,
    scope='vgg_16',
):
    """VGG-16 implementation of HED.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    is_training: whether or not the model is being trained.
    add_v1net: whether to add v1net blocks after convolutions.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional scope for the variables.
  Returns:
    side_outputs_fullres: list of side output logits resized to input resolution.
    end_points: a dict of tensors with intermediate activations.
  """
    del cams  # unused here
    side_outputs = []
    _, h, w, _ = inputs.shape.as_list()
    with tf.variable_scope(scope, 'vgg_16', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, max_pool2d.
        with slim.arg_scope(
            [slim.conv2d, slim.max_pool2d],
                outputs_collections=end_points_collection,
        ):
            net = slim.repeat(inputs,
                              2,
                              slim.conv2d,
                              64, [3, 3],
                              scope='conv1')
            net = add_v1net_layer(net, is_training, add_v1net_early, 1)
            net = tf.layers.batch_normalization(net, training=is_training)
            with tf.variable_scope("dsn_convolution_1"):
                dsn_1 = slim.conv2d(
                    net,
                    1,
                    [1, 1],
                    activation_fn=None,
                    normalizer_fn=None,
                )
            side_outputs.append(dsn_1)
            net = slim.max_pool2d(net, [2, 2], scope='pool1')

            net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
            net = add_v1net_layer(net, is_training, add_v1net, 2)
            net = tf.layers.batch_normalization(net, training=is_training)
            with tf.variable_scope("dsn_convolution_2"):
                # TODO(vveeraba): Replace following with deconvolution
                dsn_2 = resize_and_crop(
                    slim.conv2d(
                        net,
                        1,
                        [1, 1],
                        activation_fn=None,
                        normalizer_fn=None,
                    ), 2, h, w)
            side_outputs.append(dsn_2)
            net = slim.max_pool2d(net, [2, 2], scope='pool2')

            net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
            net = add_v1net_layer(net, is_training, add_v1net, 3)
            net = tf.layers.batch_normalization(net, training=is_training)
            with tf.variable_scope("dsn_convolution_3"):
                dsn_3 = resize_and_crop(
                    slim.conv2d(
                        net,
                        1,
                        [1, 1],
                        activation_fn=None,
                        normalizer_fn=None,
                    ), 4, h, w)
            side_outputs.append(dsn_3)
            net = slim.max_pool2d(net, [2, 2], scope='pool3')

            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
            net = add_v1net_layer(net, is_training, add_v1net, 4)
            net = tf.layers.batch_normalization(net, training=is_training)
            with tf.variable_scope("dsn_convolution_4"):
                dsn_4 = resize_and_crop(
                    slim.conv2d(
                        net,
                        1,
                        [1, 1],
                        activation_fn=None,
                        normalizer_fn=None,
                    ), 8, h, w)
            side_outputs.append(dsn_4)
            net = slim.max_pool2d(net, [2, 2], scope='pool4')

            net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
            net = add_v1net_layer(net, is_training, add_v1net, 5)
            net = tf.layers.batch_normalization(net, training=is_training)
            with tf.variable_scope("dsn_convolution_5"):
                dsn_5 = resize_and_crop(
                    slim.conv2d(
                        net,
                        1,
                        [1, 1],
                        activation_fn=None,
                        normalizer_fn=None,
                    ), 16, h, w)
            side_outputs.append(dsn_5)
            end_points = slim.utils.convert_collection_to_dict(
                end_points_collection)
            side_outputs = tf.stack(side_outputs, axis=0)

            with tf.variable_scope("side_output_fusion"):
                side_outputs_ = tf.squeeze(tf.transpose(
                    side_outputs, (1, 2, 3, 4, 0)),
                                           axis=3)
                fused_predictions = slim.conv2d(
                    side_outputs_,
                    1,
                    [1, 1],
                    activation_fn=None,
                    normalizer_fn=None,
                    weights_initializer=tf.constant_initializer(0.2),
                )
            end_points['fused_predictions'] = fused_predictions
            side_outputs_fullres = tf.reshape(side_outputs, (-1, h, w, 1))
            end_points['side_outputs_fullres'] = side_outputs_fullres
            return fused_predictions, end_points
Ejemplo n.º 19
0
def vgg_16_fcn8s(inputs,
                 num_classes=19,
                 is_training=True,
                 dropout_keep_prob=0.5,
                 scope='vgg_16_fcn8s'):
    """Oxford Net VGG 16-Layers version D Example.

    Note: All the fully_connected layers have been transformed to conv2d layers.
          To use in classification mode, resize input to 224x224.

    Args:
      inputs: a tensor of size [batch_size, height, width, channels].
      num_classes: number of predicted classes.
      is_training: whether or not the model is being trained.
      dropout_keep_prob: the probability that activations are kept in the
        dropout layers during training.
      scope: Optional scope for the variables.

    Returns:
      the last op containing the log predictions and end_points dict.
    """
    net = inputs
    with ExitStack() as cm:
        cm.enter_context(slim.arg_scope(vgg_arg_scope()))
        sc = cm.enter_context(tf.variable_scope(scope, 'vgg_16', [inputs]))
        end_points_collection = sc.name + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        cm.enter_context(
            slim.arg_scope(
                [slim.conv2d, slim.fully_connected, slim.max_pool2d],
                outputs_collections=end_points_collection))
        #net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        net = tf.pad(net, [[0, 0], [100, 100], [100, 100], [0, 0]])
        net = slim.conv2d(net, 64, 3, padding='VALID', scope='conv1/conv1_1')
        net = slim.conv2d(net, 64, 3, scope='conv1/conv1_2')
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        net = pool3 = slim.max_pool2d(net, [2, 2], scope='pool3')
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        net = pool4 = slim.max_pool2d(net, [2, 2], scope='pool4')
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        net = slim.max_pool2d(net, [2, 2], scope='pool5')
        # Use conv2d instead of fully_connected layers.
        net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
        net = slim.dropout(net,
                           dropout_keep_prob,
                           is_training=is_training,
                           scope='dropout6')
        net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
        net = slim.dropout(net,
                           dropout_keep_prob,
                           is_training=is_training,
                           scope='dropout7')
        net = slim.conv2d(net,
                          num_classes, [1, 1],
                          activation_fn=None,
                          normalizer_fn=None,
                          weights_initializer=tf.zeros_initializer(),
                          scope='fc8')
        upscore2a = upscale(net, 2, name='upscore2a')
        tf.add_to_collection(end_points_collection, upscore2a)
        score_pool4 = slim.conv2d(pool4 * 0.01,
                                  19,
                                  1,
                                  activation_fn=None,
                                  weights_initializer=tf.zeros_initializer(),
                                  scope='score_pool4')
        score_pool4c = crop(score_pool4, upscore2a, 5, name='score_pool4c')
        tf.add_to_collection(end_points_collection, score_pool4c)
        fuse_pool4 = tf.add(upscore2a, score_pool4c, name='fuse_pool4')
        tf.add_to_collection(end_points_collection, fuse_pool4)
        upscore_pool4a = upscale(fuse_pool4, 2, name='upscore_pool4a')
        tf.add_to_collection(end_points_collection, upscore_pool4a)
        score_pool3 = slim.conv2d(pool3 * 0.0001,
                                  19,
                                  1,
                                  activation_fn=None,
                                  weights_initializer=tf.zeros_initializer(),
                                  scope='score_pool3')
        score_pool3c = crop(score_pool3,
                            upscore_pool4a,
                            9,
                            name='score_pool3c')
        tf.add_to_collection(end_points_collection, score_pool3c)
        fuse_pool3 = tf.add(upscore_pool4a, score_pool3c, name='fuse_pool3')
        tf.add_to_collection(end_points_collection, fuse_pool3)
        upscore8a = upscale(fuse_pool3, 8, name='upscore8a')
        tf.add_to_collection(end_points_collection, upscore8a)
        net = score = crop(upscore8a, inputs, 31, name='score')
        tf.add_to_collection(end_points_collection, score)
        # Convert end_points_collection into a end_point dict.
        end_points = slim.utils.convert_collection_to_dict(
            end_points_collection)
        return net, end_points
Ejemplo n.º 20
0
def encode_effect(states, contexts, use_relation, use_point_cloud,
                  dim_fc_state, dim_fc_context):
    """Encode the effect feature.

    Args:
        states: The state as a dict.
        contexts: The context data. Set to None if no contexts are used.
        use_relation: True if use relation encoding.
        use_point_cloud: True if point cloud data is used.
        dim_fc_state: Dimension of state encoding.
        dim_fc_context: Dimension of context encoding.

    Returns:
        A tensor of shape [batch_size, dim_fc_state].
    """
    positions = states['position']
    body_masks = states['body_mask']
    num_bodies = int(body_masks.shape[-1])

    with slim.arg_scope([slim.fully_connected],
                        activation_fn=tf.nn.relu,
                        normalizer_fn=NORMALIZER_FN,
                        normalizer_params=NORMALIZER_PARAMS):

        features = []

        with tf.compat.v1.variable_scope('encode_position'):
            position_feats = slim.fully_connected(positions,
                                                  dim_fc_state,
                                                  scope='fc')
            features.append(position_feats)

        if use_relation:
            with tf.compat.v1.variable_scope('encode_relation'):
                relation_feats = encode_relation(positions,
                                                 body_masks,
                                                 dim_fc_state=dim_fc_state)
                features.append(relation_feats)

        if use_point_cloud:
            cloud_feats = states['cloud_feat']
            features.append(cloud_feats)

        if contexts is not None:
            with tf.compat.v1.variable_scope('encode_context'):
                context_feats = slim.fully_connected(contexts,
                                                     dim_fc_context,
                                                     scope='fc')
                context_feats = tf.tile(tf.expand_dims(context_feats, 1),
                                        [1, num_bodies, 1])
                features.append(context_feats)

        net = tf.concat(features, axis=-1)
        net = slim.repeat(net,
                          2,
                          slim.fully_connected,
                          dim_fc_state,
                          scope='fc')
        effects = tf.identity(net, 'effects')

    return effects
Ejemplo n.º 21
0
def vgg_19(inputs,
           y,
           num_classes=1000,
           is_training=True,
           dropout_keep_prob=0.5,
           spatial_squeeze=True,
           reuse=None,
           scope='vgg_19',
           fc_conv_padding='VALID',
           global_pool=False):
  """Oxford Net VGG 19-Layers version E Example.
  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224.
  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes. If 0 or None, the logits layer is
      omitted and the input features to the logits layer are returned instead.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional scope for the variables.
    fc_conv_padding: the type of padding to use for the fully connected layer
      that is implemented as a convolutional layer. Use 'SAME' padding if you
      are applying the network in a fully convolutional manner and want to
      get a prediction map downsampled by a factor of 32 as an output.
      Otherwise, the output prediction map will be (input / 32) - 6 in case of
      'VALID' padding.
    global_pool: Optional boolean flag. If True, the input to the classification
      layer is avgpooled to size 1x1, for any input size. (This is not part
      of the original VGG architecture.)
  Returns:
    net: the output of the logits layer (if num_classes is a non-zero integer),
      or the non-dropped-out input to the logits layer (if num_classes is 0 or
      None).
    end_points: a dict of tensors with intermediate activations.
  """
  scopes = []
  outputs= []
  if True:
    tf.get_variable_scope()._reuse=tf.AUTO_REUSE
    scope_name = tf.get_variable_scope().name
    end_points_collection = tf.get_variable_scope().name + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d.
    with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
                        outputs_collections=end_points_collection):
      net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
      scopes.append('conv1')
      outputs.append(net)
      net = slim.max_pool2d(net, [2, 2], scope='pool1')
      scopes.append('pool1')
      outputs.append(net)

      net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
      scopes.append('conv2')
      outputs.append(net)

      net = slim.max_pool2d(net, [2, 2], scope='pool2')
      scopes.append('pool2')
      outputs.append(net)

      net = slim.repeat(net, 4, slim.conv2d, 256, [3, 3], scope='conv3')
      scopes.append('conv3')
      outputs.append(net)

      net = slim.max_pool2d(net, [2, 2], scope='pool3')
      scopes.append('pool3')
      outputs.append(net)

      net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv4')
      scopes.append('conv4')
      outputs.append(net)

      net = slim.max_pool2d(net, [2, 2], scope='pool4')
      scopes.append('pool4')
      outputs.append(net)

      net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5')
      scopes.append('conv5')
      outputs.append(net)

      net = slim.max_pool2d(net, [2, 2], scope='pool5')
      scopes.append('pool5')
      outputs.append(net)


      # Use conv2d instead of fully_connected layers.
      net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6')
      scopes.append('fc6')
      outputs.append(net)

      net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                         scope='dropout6')
      scopes.append('dropout6')
      outputs.append(net)

      net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
      scopes.append('fc7')
      outputs.append(net)
      net = slim.conv2d(net, 4096, [1, 1], scope='fc8')
      scopes.append('fc8')
      outputs.append(net)
      net = slim.conv2d(net, 4096, [1, 1], scope='fc9')
      scopes.append('fc9')
      outputs.append(net)
      net = slim.conv2d(net, 4096, [1, 1], scope='fc10')
      scopes.append('fc10')
      outputs.append(net)

      # Convert end_points_collection into a end_point dict.
      if num_classes:
        net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                           scope='dropout10')
        scopes.append('dropout10')
        outputs.append(net)

        net = slim.conv2d(net, num_classes, [1, 1],
                          activation_fn=None,
                          normalizer_fn=None,
                          scope='fc11')

      with tf.variable_scope("fc11"):
        net = tf.squeeze(net, [1, 2], name="squzzezd")
        _, indexs = tf.math.top_k(net,5)

        def fn(args):
          y,index = args
          return tf.gather(y,index)
        acc_array = tf.vectorized_map(fn,(y,indexs))

        top_accuracy = tf.reduce_sum(acc_array,name="top_accuracy")

        loss = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=net)
        loss = tf.reduce_mean(loss)
      scopes.append('fc11')
      outputs.append(loss)
      return loss, outputs,scopes
Ejemplo n.º 22
0
def vgg_19(inputs,
           num_classes=1000,
           is_training=True,
           dropout_keep_prob=0.5,
           spatial_squeeze=True,
           scope='vgg_19',
           fc_conv_padding='VALID',
           global_pool=False):
    """
        VGG19模型
        :param inputs:a tensor [batch_size, height, width, channels]
        :param num_classes:分类数
        :param is_training: 是否训练
        :param dropout_keep_prob: 训练时dropout保持激活的可能性
        :param spatial_squeeze:是否压缩输出的空间维度
        :param scope:变量的可选范围
        :param fc_conv_padding: 全连接层的填充类型 'SAME' or 'VALID'
        :param global_pool: a boolean flag .True: 则对分类模块的输入需用平均池化
        :return: net: VGG net
                 end_points :a dict of tensors with intermediate activations.
        """
    with tf.compat.v1.variable_scope(scope, 'vgg_19', [inputs]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with slim.arg_scope(
            [slim.conv2d, slim.fully_connected, slim.max_pool2d],
                outputs_collections=end_points_collection):
            net = slim.repeat(inputs,
                              2,
                              slim.conv2d,
                              64, [3, 3],
                              scope='conv1')
            net = slim.max_pool2d(net, [2, 2], scope='pool1')
            net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
            net = slim.max_pool2d(net, [2, 2], scope='pool2')
            net = slim.repeat(net, 4, slim.conv2d, 256, [3, 3], scope='conv3')
            net = slim.max_pool2d(net, [2, 2], scope='pool3')
            net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv4')
            net = slim.max_pool2d(net, [2, 2], scope='pool4')
            net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5')
            net = slim.max_pool2d(net, [2, 2], scope='pool5')

            # Use conv2d instead of fully_connected layers.
            net = slim.conv2d(net,
                              4096, [7, 7],
                              padding=fc_conv_padding,
                              scope='fc6')
            net = slim.dropout(net,
                               dropout_keep_prob,
                               is_training=is_training,
                               scope='dropout6')
            net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
            # Convert end_points_collection into a end_point dict.
            end_points = slim.utils.convert_collection_to_dict(
                end_points_collection)
            if global_pool:
                net = tf.reduce_mean(net, [1, 2],
                                     keep_dims=True,
                                     name='global_pool')
                end_points['global_pool'] = net
            if num_classes:
                net = slim.dropout(net,
                                   dropout_keep_prob,
                                   is_training=is_training,
                                   scope='dropout7')
                net = slim.conv2d(net,
                                  num_classes, [1, 1],
                                  activation_fn=None,
                                  normalizer_fn=None,
                                  scope='fc8')
                if spatial_squeeze:
                    net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
                end_points[sc.name + '/fc8'] = net
            return net, end_points