def testEndPointsV2(self): """Test the end points of a tiny v2 bottleneck network.""" blocks = [ resnet_v2.resnet_v2_block('block1', base_depth=1, num_units=2, stride=2), resnet_v2.resnet_v2_block('block2', base_depth=2, num_units=2, stride=1), ] inputs = create_test_input(2, 32, 16, 3) with arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = self._resnet_plain(inputs, blocks, scope='tiny') expected = [ 'tiny/block1/unit_1/bottleneck_v2/shortcut', 'tiny/block1/unit_1/bottleneck_v2/conv1', 'tiny/block1/unit_1/bottleneck_v2/conv2', 'tiny/block1/unit_1/bottleneck_v2/conv3', 'tiny/block1/unit_2/bottleneck_v2/conv1', 'tiny/block1/unit_2/bottleneck_v2/conv2', 'tiny/block1/unit_2/bottleneck_v2/conv3', 'tiny/block2/unit_1/bottleneck_v2/shortcut', 'tiny/block2/unit_1/bottleneck_v2/conv1', 'tiny/block2/unit_1/bottleneck_v2/conv2', 'tiny/block2/unit_1/bottleneck_v2/conv3', 'tiny/block2/unit_2/bottleneck_v2/conv1', 'tiny/block2/unit_2/bottleneck_v2/conv2', 'tiny/block2/unit_2/bottleneck_v2/conv3' ] self.assertItemsEqual(expected, end_points)
def testEndPointsV2(self): """Test the end points of a tiny v2 bottleneck network.""" blocks = [ resnet_v2.resnet_v2_block( 'block1', base_depth=1, num_units=2, stride=2), resnet_v2.resnet_v2_block( 'block2', base_depth=2, num_units=2, stride=1), ] inputs = create_test_input(2, 32, 16, 3) with arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = self._resnet_plain(inputs, blocks, scope='tiny') expected = [ 'tiny/block1/unit_1/bottleneck_v2/shortcut', 'tiny/block1/unit_1/bottleneck_v2/conv1', 'tiny/block1/unit_1/bottleneck_v2/conv2', 'tiny/block1/unit_1/bottleneck_v2/conv3', 'tiny/block1/unit_2/bottleneck_v2/conv1', 'tiny/block1/unit_2/bottleneck_v2/conv2', 'tiny/block1/unit_2/bottleneck_v2/conv3', 'tiny/block2/unit_1/bottleneck_v2/shortcut', 'tiny/block2/unit_1/bottleneck_v2/conv1', 'tiny/block2/unit_1/bottleneck_v2/conv2', 'tiny/block2/unit_1/bottleneck_v2/conv3', 'tiny/block2/unit_2/bottleneck_v2/conv1', 'tiny/block2/unit_2/bottleneck_v2/conv2', 'tiny/block2/unit_2/bottleneck_v2/conv3' ] self.assertItemsEqual(expected, end_points)
def __init__(self, x1, y_, num_classes, is_training, global_pool, output_stride, reuse, scope): ## define resnet structure blocks = [ resnet_v2.resnet_v2_block('block1', base_depth=16, num_units=3, stride=[1, 1]), resnet_v2.resnet_v2_block('block2', base_depth=32, num_units=4, stride=[1, 2]), resnet_v2.resnet_v2_block('block3', base_depth=64, num_units=6, stride=[1, 2]), resnet_v2.resnet_v2_block('block4', base_depth=128, num_units=3, stride=[1, 2]), ] inputlayer = self.cmn(x1) loss, end_points = self.resnet_v2_spkid(inputlayer, y_, blocks, num_classes=num_classes, is_training=is_training, global_pool=global_pool, output_stride=output_stride, reuse=reuse, scope=scope) self.end_points = end_points self.loss = loss self.label = y_
def resnet101(inputs, num_classes=None, is_training=True, global_pool=False, output_stride=None, spatial_squeeze=False, reuse=tf.AUTO_REUSE, scope='resnet_v2_101'): """ResNet-101 model of [1]. See resnet_v2() for arg and return description.""" blocks = [ resnet_v2.resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), resnet_v2.resnet_v2_block('block2', base_depth=128, num_units=4, stride=2), resnet_v2.resnet_v2_block('block3', base_depth=256, num_units=23, stride=2), ] return resnet_v2.resnet_v2(inputs, blocks, num_classes, is_training=is_training, global_pool=global_pool, output_stride=output_stride, include_root_block=True, reuse=reuse, scope=scope)
def resnet_v2_50(inputs, num_classes=None, is_training=True, global_pool=True, output_stride=None, reuse=None, scope='resnet_v2_50'): """ ResNet-50 model of [1]. See resnet_v2() for arg and return description. """ blocks = [ resnet_v2.resnet_v2_block('block1', base_depth=64, num_units=3, stride=1), resnet_v2.resnet_v2_block('block2', base_depth=128, num_units=4, stride=2), resnet_v2.resnet_v2_block('block3', base_depth=256, num_units=6, stride=2), resnet_v2.resnet_v2_block('block4', base_depth=512, num_units=3, stride=2), ] return resnet_v2.resnet_v2( inputs, blocks, num_classes, is_training=is_training, global_pool=global_pool, output_stride=output_stride, include_root_block=True, reuse=reuse, scope=scope)
def resnet_v2_50(inputs, is_training=True): blocks = [ resnet_v2.resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), resnet_v2.resnet_v2_block('block2', base_depth=128, num_units=4, stride=2), resnet_v2.resnet_v2_block('block3', base_depth=256, num_units=6, stride=2), resnet_v2.resnet_v2_block('block4', base_depth=512, num_units=3, stride=1), ] with slim.arg_scope(resnet_v2.resnet_arg_scope()): with tf.variable_scope('resnet_v2_50', 'resnet_v2', [inputs]): with slim.arg_scope([ slim.conv2d, resnet_v2.bottleneck, resnet_utils.stack_blocks_dense ]): with slim.arg_scope([slim.batch_norm], is_training=is_training): net = inputs with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None): net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') net = resnet_utils.stack_blocks_dense(net, blocks) # This is needed because the pre-activation variant does not have batch # normalization or activation functions in the residual unit output. See # Appendix of [2]. net = slim.batch_norm(net, activation_fn=nn_ops.relu, scope='postnorm') net = tf.reduce_mean(net, [1, 2], name='pool5', keepdims=True) return net
def construct_embedding(self): """Builds an embedding function on top of images. Method to be overridden by implementations. Returns: embeddings: A 2-d float32 `Tensor` of shape [batch_size, embedding_size] holding the embedded images. """ with tf.variable_scope('tcn_net', reuse=self._reuse) as vs: self._adaptation_scope = vs.name net = self._pretrained_output # Define some adaptation blocks on top of the pre-trained resnet output. adaptation_blocks = [] adaptation_block_params = [map( int, i.split('_')) for i in self._config.adaptation_blocks.split('-')] for i, (depth, num_units) in enumerate(adaptation_block_params): block = resnet_v2.resnet_v2_block( 'adaptation_block_%d' % i, base_depth=depth, num_units=num_units, stride=1) adaptation_blocks.append(block) # Stack them on top of the resent output. net = resnet_utils.stack_blocks_dense( net, adaptation_blocks, output_stride=None) # Average pool the output. net = tf.reduce_mean(net, [1, 2], name='adaptation_pool', keep_dims=True) if self._config.emb_connection == 'fc': # Use fully connected layer to project to embedding layer. fc_hidden_sizes = self._config.fc_hidden_sizes if fc_hidden_sizes == 'None': fc_hidden_sizes = [] else: fc_hidden_sizes = map(int, fc_hidden_sizes.split('_')) fc_hidden_keep_prob = self._config.dropout.keep_fc net = tf.squeeze(net) for fc_hidden_size in fc_hidden_sizes: net = slim.layers.fully_connected(net, fc_hidden_size) if fc_hidden_keep_prob < 1.0: net = slim.dropout(net, keep_prob=fc_hidden_keep_prob, is_training=self._is_training) # Connect last FC layer to embedding. embedding = slim.layers.fully_connected(net, self._embedding_size, activation_fn=None) else: # Use 1x1 conv layer to project to embedding layer. embedding = slim.conv2d( net, self._embedding_size, [1, 1], activation_fn=None, normalizer_fn=None, scope='embedding') embedding = tf.squeeze(embedding) # Optionally L2 normalize the embedding. if self._embedding_l2: embedding = tf.nn.l2_normalize(embedding, dim=1) return embedding
def feature_extractor_resnet(images, dim=256, weight_decay=0.0001, batch_norm_decay=0.999, batch_renorm_decay=0.99, batch_renorm_rmax=3., batch_renorm_dmax=5., is_training=True, use_conv3d=True): from tensorflow.contrib.slim.python.slim.nets import resnet_v2 if use_conv3d: orig_shape = tf.shape(images) # [N,T,H,W,C] -> [N*T,H,W,C] images = tf.reshape(images, tf.concat([[-1], orig_shape[2:]], 0)) resnet_arg_scope = resnet_v2.resnet_arg_scope( weight_decay=weight_decay, batch_norm_decay=batch_norm_decay) # batch size is small so we use batch renormalization batch_norm_key = filter(lambda x: 'batch_norm' in x, resnet_arg_scope.keys())[0] resnet_arg_scope[batch_norm_key].update({ 'renorm': True, 'renorm_decay': batch_renorm_decay, 'renorm_clipping': { 'rmin': 1. / batch_renorm_rmax, 'rmax': batch_renorm_rmax, 'dmax': batch_renorm_dmax } }) with slim.arg_scope(resnet_arg_scope): blocks = [ resnet_v2.resnet_v2_block('block1', base_depth=16, num_units=3, stride=2), resnet_v2.resnet_v2_block('block2', base_depth=32, num_units=4, stride=2), resnet_v2.resnet_v2_block('block3', base_depth=64, num_units=6, stride=2), #256 resnet_v2.resnet_v2_block('block4', base_depth=128, num_units=3, stride=1) #512 ] _, end_points = resnet_v2.resnet_v2(images, blocks, is_training=is_training, include_root_block=False) net = end_points['resnet_v2/block4'] if use_conv3d: # [N*T,H',W',C'] -> [N,T,H',W',C'] net = tf.reshape(net, tf.concat( [orig_shape[:2], tf.shape(net)[1:]], 0)) arg_scope = convert_resnet_arg_scope_to_slim(resnet_arg_scope) arg_scope[slim.conv2d].update({'stride': 1, 'padding': 'SAME'}) arg_scope[slim.conv3d].update({'stride': 1, 'padding': 'SAME'}) arg_scope[slim.batch_norm]['is_training'] = is_training with slim.arg_scope(arg_scope): if use_conv3d: net = slim.conv3d(net, 512, [3, 3, 3]) net = slim.conv3d(net, 256, [1, 1, 1]) net = slim.conv3d(net, 512, [3, 3, 3]) # the last layer without activation function feature_map = slim.conv3d(net, dim, [1, 1, 1], activation_fn=None, normalizer_fn=None) else: # the last layer without activation function feature_map = slim.conv2d(net, dim, [1, 1], activation_fn=None, normalizer_fn=None) return feature_map
def _build_network(self, sess, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) # choose different blocks for different number of layers if self._num_layers == 50: blocks = [resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), resnet_v2_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer resnet_v2_block('block3', base_depth=256, num_units=6, stride=1), resnet_v2_block('block4', base_depth=512, num_units=3, stride=1)] elif self._num_layers == 101: blocks = [resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), resnet_v2_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer resnet_v2_block('block3', base_depth=256, num_units=23, stride=1), resnet_v2_block('block4', base_depth=512, num_units=3, stride=1)] elif self._num_layers == 152: blocks = [resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), resnet_v2_block('block2', base_depth=128, num_units=8, stride=2), # use stride 1 for the last conv4 layer resnet_v2_block('block3', base_depth=256, num_units=36, stride=1), resnet_v2_block('block4', base_depth=512, num_units=3, stride=1)] else: # other numbers are not supported raise NotImplementedError assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3) # Now the base is always fixed during training with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv = self._build_base() if cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv, _ = resnet_v2.resnet_v2(net_conv, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) if cfg.RESNET.FIXED_BLOCKS < 3: with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv, _ = resnet_v2.resnet_v2(net_conv, blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._act_summaries.append(net_conv) self._layers['head'] = net_conv with tf.variable_scope(self._resnet_scope, self._resnet_scope): # build the anchors for the image self._anchor_component() # region proposal network rois = self._region_proposal(net_conv, is_training, initializer) # region of interest pooling if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv, rois, "pool5") else: raise NotImplementedError with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v2.resnet_v2(pool5, blocks[-1:], global_pool=False, include_root_block=False, scope=self._resnet_scope) with tf.variable_scope(self._resnet_scope, self._resnet_scope): # average pooling done by reduce_mean fc7 = tf.reduce_mean(fc7, axis=[1, 2]) # region classification cls_prob, poly_pred = self._region_classification(fc7, is_training, initializer, initializer_bbox) self._score_summaries.update(self._predictions) return rois, cls_prob, poly_pred