def __init__(self): self._num_classes = cfg.NUM_CLASSES self._batch_size = cfg.TRAIN.BATCH_SIZE self._latent_size = 128 self._hidden_size = 256 self._supervised_scaling_const = 0.1 * (55000 / 100) self._x_labeled = tf.placeholder(tf.float32, shape=[self._batch_size, 28, 28, 1]) self._x_unlabeled = tf.placeholder(tf.float32, shape=[self._batch_size, 28, 28, 1]) self._x = tf.concat([self._x_labeled, self._x_unlabeled], 0) self._y_labeled = tf.placeholder(tf.float32, shape=[self._batch_size, self._num_classes]) self._y_all, self.y_unlabeled = self.generate_y(self._y_labeled) self._losses = {} self._initializer = self.define_initializer() self._blocks_encoder = [resnet_utils.Block('block4', bottleneck, [(256, 128, 1)] * 3)] self._blocks_decoder_valid = [resnet_utils.Block('block5', bottleneck_trans_valid, [(256, 128, 1), (256, 128, 2)])] self._blocks_decoder_same = [resnet_utils.Block('block5', bottleneck_trans_same, [(256, 128, 2), (256, 128, 2)])] self._resnet_scope = 'resnet_v1_%d' % 101 x_unlabeled_tiled = tf.tile(self._x_unlabeled, [self._num_classes, 1, 1, 1]) # (100, 256) --> (2100, 256) self.outputs = {'labeled': {'x_in': self._x_labeled}, 'unlabeled': {'x_in': x_unlabeled_tiled}}
def resnet_v2_200(inputs, num_classes=None, global_pool=True, output_stride=None, reuse=None, scope='resnet_v2_200'): """ResNet-200 model of [2]. See resnet_v2() for arg and return description.""" blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]), resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] return resnet_v2( inputs, blocks, num_classes, global_pool, output_stride, include_root_block=True, reuse=reuse, scope=scope)
def testEndPointsV1(self): """Test the end points of a tiny v1 bottleneck network.""" bottleneck = resnet_v1.bottleneck blocks = [ resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]), resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)]) ] inputs = create_test_input(2, 32, 16, 3) with arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = self._resnet_plain(inputs, blocks, scope='tiny') expected = [ 'tiny/block1/unit_1/bottleneck_v1/shortcut', 'tiny/block1/unit_1/bottleneck_v1/shortcut/BatchNorm', 'tiny/block1/unit_1/bottleneck_v1/conv1', 'tiny/block1/unit_1/bottleneck_v1/conv2', 'tiny/block1/unit_1/bottleneck_v1/conv3', 'tiny/block1/unit_1/bottleneck_v1/conv3/BatchNorm', 'tiny/block1/unit_2/bottleneck_v1/conv1', 'tiny/block1/unit_2/bottleneck_v1/conv2', 'tiny/block1/unit_2/bottleneck_v1/conv3', 'tiny/block1/unit_2/bottleneck_v1/conv3/BatchNorm', 'tiny/block2/unit_1/bottleneck_v1/shortcut', 'tiny/block2/unit_1/bottleneck_v1/shortcut/BatchNorm', 'tiny/block2/unit_1/bottleneck_v1/conv1', 'tiny/block2/unit_1/bottleneck_v1/conv2', 'tiny/block2/unit_1/bottleneck_v1/conv3', 'tiny/block2/unit_1/bottleneck_v1/conv3/BatchNorm', 'tiny/block2/unit_2/bottleneck_v1/conv1', 'tiny/block2/unit_2/bottleneck_v1/conv2', 'tiny/block2/unit_2/bottleneck_v1/conv3', 'tiny/block2/unit_2/bottleneck_v1/conv3/BatchNorm' ] self.assertItemsEqual(expected, end_points)
def __init__(self, is_training): self.name = self.__class__.__name__.lower() self.vars = {} self.layers = [] self.activations = [] self.visualize = {} self.intermediate = {} self.predictions = {} self.losses = {} self.score_summaries = {} self.event_summaries = {} self.image = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image') self.H_boxes = tf.placeholder(tf.float32, shape=[None , 5], name = 'H_boxes') self.O_boxes = tf.placeholder(tf.float32, shape=[None , 5], name = 'O_boxes') self.H_boxes_enc = tf.placeholder(tf.float32, shape=[None , 4], name = 'H_boxes_enc') self.O_boxes_enc = tf.placeholder(tf.float32, shape=[None , 4], name = 'O_boxes_enc') self.HO_boxes_enc= tf.placeholder(tf.float32, shape=[None , 4], name = 'HO_boxes_enc') self.gt_class_H = tf.placeholder(tf.float32, shape=[None, 26], name = 'gt_class_H') self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 26], name = 'gt_class_HO') self.Mask_HO = tf.placeholder(tf.float32, shape=[None, 26], name = 'HO_mask') self.Mask_H = tf.placeholder(tf.float32, shape=[None, 26], name = 'H_mask') self.H_num = tf.placeholder(tf.int32) # pos self.ivs = tf.placeholder(tf.int32, shape=[26], name = 'idx_GT_verbs') self.inputs = tf.placeholder(tf.float32, shape=[None, 300], name = 'embedding') self.support = [tf.sparse_placeholder(tf.float32) for _ in range(1)] self.num_nonzero = tf.placeholder(tf.int32) self.in_dim = 300 self.hidden_dim = 512 self.out_dim = 512 self.num_classes = 26 self.scope = 'resnet_v1_50' self.stride = [16, ] self.train = is_training self.now_lr = None self.optimizer = None self.opt_op = None if tf.__version__ == '1.1.0': self.blocks = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck,[(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3), resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)] else: from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block self.blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)] self.build_all()
def __init__(self): self.visualize = {} self.intermediate = {} self.predictions = {} self.score_summaries = {} self.event_summaries = {} self.train_summaries = [] self.losses = {} self.image = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image') self.spatial = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name = 'sp') # Pattern.reshape( num_pos_neg, 64, 64, 3) self.Hsp_boxes = tf.placeholder(tf.float32, shape=[None, 5], name = 'Hsp_boxes') self.O_boxes = tf.placeholder(tf.float32, shape=[None, 5], name = 'O_boxes') #Object_augmented[:num_pos].reshape(num_pos, 5) self.gt_class_H = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_H') self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_HO') self.gt_class_sp = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_sp') self.Mask_HO = tf.placeholder(tf.float32, shape=[None, 29], name = 'HO_mask') self.Mask_H = tf.placeholder(tf.float32, shape=[None, 29], name = 'H_mask') self.Mask_sp = tf.placeholder(tf.float32, shape=[None, 29], name = 'sp_mask') self.gt_binary_label = tf.placeholder(tf.float32, shape=[None, 2], name = 'gt_binary_label') self.H_num = tf.placeholder(tf.int32) self.HO_weight = np.array([3.3510249, 3.4552405, 4.0257854, 0.0, 4.088436, 3.4370995, 3.85842, 4.637334, 3.5487218, 3.536237, 2.5578923, 3.342811, 3.8897269, 4.70686, 3.3952892, 3.9706533, 4.504736, 0.0, 1.4873443, 3.700363, 4.1058283, 3.6298118, 0.0, 6.490651, 5.0808263, 1.520838, 3.3888445, 0.0, 3.9899964], dtype = 'float32').reshape(1,29) self.H_weight = np.array([4.0984106, 4.102459, 4.0414762, 4.060745, 4.0414762, 3.9768186, 4.23686, 5.3542085, 3.723717, 3.4699364, 2.4587274, 3.7167964, 4.08836, 5.050695, 3.9077065, 4.534647, 3.4699364, 2.9466882, 1.8585607, 3.9433942, 3.9433942, 4.3523254, 3.8368235, 6.4963055, 5.138182, 1.7807873, 4.080392, 1.9544303, 4.5761204],dtype = 'float32').reshape(1,29) self.binary_weight = np.array([1.0986122886681098, 0.4054651081081644], dtype = 'float32').reshape(1,2) self.num_classes = 29 self.num_binary = 2 # existence (0 or 1) of HOI self.num_fc = 1024 self.scope = 'resnet_v1_50' self.stride = [16, ] self.lr = tf.placeholder(tf.float32) if tf.__version__ == '1.1.0': self.blocks = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck,[(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3), resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)] else: from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block self.blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)]
def resnet_v1_block(scope, base_depth, num_units, stride, rate=1): """Helper function for creating a resnet_v1 bottleneck block. downsampling(stride=2) is done in each stage's first block! Args: scope: The scope of the block. base_depth: The depth of the bottleneck layer for each unit. num_units: The number of units in the block. stride: The stride of the block, implemented as a stride in the last unit. All other units have stride=1. Returns: A resnet_v1 bottleneck block. """ return resnet_utils.Block(scope, bottleneck, [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': stride, 'rate': rate, }] + [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': 1, 'rate': 1 }] * (num_units - 1))
def _atrousValues(self, bottleneck): """Verify the values of dense feature extraction by atrous convolution. Make sure that dense feature extraction by stack_blocks_dense() followed by subsampling gives identical results to feature extraction at the nominal network output stride using the simple self._stack_blocks_nondense() above. Args: bottleneck: The bottleneck function. """ blocks = [ resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]), resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]), resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]), resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)]) ] nominal_stride = 8 # Test both odd and even input dimensions. height = 30 width = 31 with arg_scope(resnet_utils.resnet_arg_scope(is_training=False)): for output_stride in [1, 2, 4, 8, None]: with ops.Graph().as_default(): with self.test_session() as sess: random_seed.set_random_seed(0) inputs = create_test_input(1, height, width, 3) # Dense feature extraction followed by subsampling. output = resnet_utils.stack_blocks_dense( inputs, blocks, output_stride) if output_stride is None: factor = 1 else: factor = nominal_stride // output_stride output = resnet_utils.subsample(output, factor) # Make the two networks use the same weights. variable_scope.get_variable_scope().reuse_variables() # Feature extraction at the nominal network rate. expected = self._stack_blocks_nondense(inputs, blocks) sess.run(variables.global_variables_initializer()) output, expected = sess.run([output, expected]) self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)
def resnet_v1_block(scope, base_depth, num_units, stride): return resnet_utils.Block(scope, not_bottleneck, [{ 'depth': base_depth, 'stride': stride }] + (num_units - 1) * [{ 'depth': base_depth, 'stride': 1 }])
def resnet50V2_reduced(inputs, is_training=True, output_stride=None, include_root_block=True, reuse=None, scope=None): # These are the blocks for resnet 50 blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 5) ] # Initialize Model with tf.variable_scope(scope, 'resnet_v2_50', [inputs], reuse=reuse): with slim.arg_scope( [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense]): with slim.arg_scope([slim.batch_norm], is_training=is_training) as scope: net = inputs if include_root_block: if output_stride is not None: if output_stride % 4 != 0: raise ValueError( 'The output_stride needs to be a multiple of 4.' ) output_stride /= 4 with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None): net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) with slim.arg_scope([slim.batch_norm], is_training=is_training) as scope: net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm') return net
def resnet_v1_block(scope, bottleneck, base_depth, num_units, stride): return resnet_utils.Block(scope, bottleneck, [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': 1 }] * (num_units - 1) + [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': stride }])
def _resnet_small(self, inputs, num_classes=None, global_pool=True, output_stride=None, include_root_block=True, reuse=None, scope='resnet_v1_small'): """A shallow and thin ResNet v1 for faster tests.""" bottleneck = resnet_v1.bottleneck blocks = [ resnet_utils.Block('block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]), resnet_utils.Block('block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]), resnet_utils.Block('block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]), resnet_utils.Block('block4', bottleneck, [(32, 8, 1)] * 2) ] return resnet_v1.resnet_v1(inputs, blocks, num_classes, global_pool, output_stride, include_root_block, reuse, scope)
def build_network(self): # select initializers bottleneck = resnet_v1.bottleneck blocks = [resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)] assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4) if cfg.RESNET.FIXED_BLOCKS == 3: with slim.arg_scope(resnet_arg_scope(trainable=False)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) elif cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(trainable=False)): net = self.build_base() net, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) with slim.arg_scope(resnet_arg_scope()): net_conv4, _ = resnet_v1.resnet_v1(net, blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) else: # cfg.RESNET.FIXED_BLOCKS == 0 with slim.arg_scope(resnet_arg_scope()): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) return net_conv4
def resnet_v2_block(scope, base_depth, num_units, stride): """Helper function for creating a resnet_v2 bottleneck block.""" return resnet_utils.Block(scope, bottleneck, [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': 1 }] * (num_units - 1) + [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': stride }])
def __init__(self): self.visualize = {} self.intermediate = {} self.predictions = {} self.score_summaries = {} self.event_summaries = {} self.train_summaries = [] self.losses = {} self.image = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image') self.spatial = tf.placeholder(tf.float32, shape=[None, 64, 64, 2], name = 'sp') self.Hsp_boxes = tf.placeholder(tf.float32, shape=[None, 5], name = 'Hsp_boxes') self.O_boxes = tf.placeholder(tf.float32, shape=[None, 5], name = 'O_boxes') self.gt_class_H = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_H') self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_HO') self.gt_class_sp = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_sp') self.Mask_HO = tf.placeholder(tf.float32, shape=[None, 29], name = 'HO_mask') self.Mask_H = tf.placeholder(tf.float32, shape=[None, 29], name = 'H_mask') self.Mask_sp = tf.placeholder(tf.float32, shape=[None, 29], name = 'sp_mask') self.H_num = tf.placeholder(tf.int32) self.num_classes = 29 self.num_fc = 1024 self.scope = 'resnet_v1_50' self.stride = [16, ] self.lr = tf.placeholder(tf.float32) if tf.__version__ == '1.1.0': self.blocks = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck,[(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3), resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)] else: from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block self.blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)]
def resnet_v1_block(scope, base_depth, num_units, stride): """Helper function for creating a resnet_v1 bottleneck block. Args: scope: The scope of the block. base_depth: The depth of the bottleneck layer for each unit. num_units: The number of units in the block. stride: The stride of the block, implemented as a stride in the last unit. All other units have stride=1. Returns: A resnet_v1 bottleneck block. """ return resnet_utils.Block(scope, naive, [{ 'depth': base_depth, 'stride': 1 }] * (num_units - 1) + [{ 'depth': base_depth, 'stride': stride }])
def resnet_v1_block(scope, base_depth, num_units, stride, deformable_units=0, rate=1): """Helper function for creating a resnet_v1 bottleneck block. downsampling(stride=2) is done in each stage's first block! Args: scope: The scope of the block. base_depth: The depth of the bottleneck layer for each unit. num_units: The number of units in the block. stride: The stride of the block, implemented as a stride in the last unit. All other units have stride=1. Returns: A resnet_v1 bottleneck block. """ assert num_units >= deformable_units num_units -= deformable_units return resnet_utils.Block( scope, deformable_bottleneck, [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': stride, 'rate': rate, 'deformable': False }] + [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': 1, 'rate': 1, 'deformable': False }] * (num_units - 1) + [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': 1, # TODO: set all deformables's rate to be 2! 'rate': 1, 'deformable': True }] * (deformable_units))
def resnet_v2_block(scope, base_depth, num_units, stride): """Helper function for creating a resnet_v2 bottleneck block. Args: scope: The scope of the block. base_depth: The depth of the bottleneck layer for each unit. num_units: The number of units in the block. stride: The stride of the block, implemented as a stride in the last unit. All other units have stride=1. 网络层数 = 残差单元个数之和 *3 +2,*3是因为每个残差单元有3层网络。 +2是一个输入一个输出。 50 = 16*3 + 2 Returns: A resnet_v2 bottleneck block. """ return resnet_utils.Block(scope, bottleneck, [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': 1 }] * (num_units - 1) + [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': stride }])
def build_network(self, sess, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) bottleneck = resnet_v1.bottleneck # choose different blocks for different number of layers if self._num_layers == 50: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 101: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 152: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] else: # other numbers are not supported raise NotImplementedError assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4) if cfg.RESNET.FIXED_BLOCKS == 3: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1( net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) elif cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, _ = resnet_v1.resnet_v1( net, blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) else: # cfg.RESNET.FIXED_BLOCKS == 0 with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._act_summaries.append(net_conv4) self._layers['head'] = net_conv4 if False: with tf.variable_scope('noise'): #kernel = tf.get_variable('weights', #shape=[5, 5, 3, 3], #initializer=tf.constant_initializer(c)) conv = tf.nn.conv2d(self.noise, Wcnn, [1, 1, 1, 1], padding='SAME', name='srm') self._layers['noise'] = conv with slim.arg_scope(resnet_arg_scope(is_training=is_training)): #srm_conv = tf.nn.tanh(conv, name='tanh') noise_net = resnet_utils.conv2d_same(conv, 64, 7, stride=2, scope='conv1') noise_net = tf.pad(noise_net, [[0, 0], [1, 1], [1, 1], [0, 0]]) noise_net = slim.max_pool2d(noise_net, [3, 3], stride=2, padding='VALID', scope='pool1') #net_sum=tf.concat(3,[net_conv4,noise_net]) noise_conv4, _ = resnet_v1.resnet_v1(noise_net, blocks[0:-1], global_pool=False, include_root_block=False, scope='noise') with tf.variable_scope(self._resnet_scope, self._resnet_scope): # build the anchors for the image self._anchor_component() # rpn rpn = slim.conv2d(net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer( rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer( rpn_cls_prob, rpn_bbox_pred, "rois") rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer( rois, roi_scores, "rpn_rois") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError # rcnn if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv4, rois, "pool5") #pool5 = self._crop_pool_layer(net_sum, rois, "pool5") else: raise NotImplementedError if False: noise_pool5 = self._crop_pool_layer(noise_conv4, rois, "noise_pool5") with slim.arg_scope(resnet_arg_scope(is_training=is_training)): noise_fc7, _ = resnet_v1.resnet_v1(noise_pool5, blocks[-1:], global_pool=False, include_root_block=False, scope='noise') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v1.resnet_v1(pool5, blocks[-1:], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._layers['fc7'] = fc7 with tf.variable_scope(self._resnet_scope, self._resnet_scope): #pdb.set_trace() #noise_fc7 = tf.reduce_mean(noise_fc7, axis=[1, 2]) #bilinear_pool=compact_bilinear_pooling_layer(fc7,noise_fc7,2048*4,compute_size=16,sequential=False) #bilinear_pool=tf.reshape(bilinear_pool, [-1,2048*4]) fc7 = tf.reduce_mean(fc7, axis=[1, 2]) cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score') #pdb.set_trace() #noise_cls_score = slim.fully_connected(bilinear_pool, self._num_classes, weights_initializer=initializer, #trainable=is_training, activation_fn=None, scope='noise_cls_score') cls_prob = self._softmax_layer(cls_score, "cls_prob") bbox_pred = slim.fully_connected( fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') #with tf.variable_scope(self._resnet_scope, self._resnet_scope): # Average pooling done by reduce_mean #fc7 = tf.reduce_mean(fc7, axis=[1, 2]) #fc_con=tf.concat(1,[fc7,noise_fc]) #cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, #trainable=False, activation_fn=None, scope='cls_score') #cls_score1=cls_score+10*noise_cls_score #cls_prob = self._softmax_layer(noise_cls_score, "cls_prob") #bbox_pred = slim.fully_connected(fc7, self._num_classes * 4, weights_initializer=initializer_bbox, #trainable=False, #activation_fn=None, scope='bbox_pred') self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def __init__(self): self.visualize = {} self.intermediate = {} self.predictions = {} self.score_summaries = {} self.event_summaries = {} self.train_summaries = [] self.losses = {} self.image = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image') self.spatial = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name = 'sp') self.H_boxes = tf.placeholder(tf.float32, shape=[None, 5], name = 'H_boxes') self.O_boxes = tf.placeholder(tf.float32, shape=[None, 5], name = 'O_boxes') self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 600], name = 'gt_class_HO') self.gt_binary_label = tf.placeholder(tf.float32, shape=[None, 2], name = 'gt_binary_label') self.H_num = tf.placeholder(tf.int32) self.HO_weight = np.array([ 9.192927, 9.778443, 10.338059, 9.164914, 9.075144, 10.045923, 8.714437, 8.59822, 12.977117, 6.2745423, 11.227917, 6.765012, 9.436157, 9.56762, 11.0675745, 11.530198, 9.609821, 9.897503, 6.664475, 6.811699, 6.644726, 9.170454, 13.670264, 3.903943, 10.556748, 8.814335, 9.519224, 12.753973, 11.590822, 8.278912, 5.5245695, 9.7286825, 8.997436, 10.699849, 9.601237, 11.965516, 9.192927, 10.220277, 6.056692, 7.734048, 8.42324, 6.586457, 6.969533, 10.579222, 13.670264, 4.4531965, 9.326459, 9.288238, 8.071842, 10.431585, 12.417501, 11.530198, 11.227917, 4.0678477, 8.854023, 12.571651, 8.225684, 10.996116, 11.0675745, 10.100731, 7.0376034, 7.463688, 12.571651, 14.363411, 5.4902234, 11.0675745, 14.363411, 8.45805, 10.269067, 9.820116, 14.363411, 11.272368, 11.105314, 7.981595, 9.198626, 3.3284247, 14.363411, 12.977117, 9.300817, 10.032678, 12.571651, 10.114916, 10.471591, 13.264799, 14.363411, 8.01953, 10.412168, 9.644913, 9.981384, 7.2197933, 14.363411, 3.1178555, 11.031207, 8.934066, 7.546675, 6.386472, 12.060826, 8.862153, 9.799063, 12.753973, 12.753973, 10.412168, 10.8976755, 10.471591, 12.571651, 9.519224, 6.207762, 12.753973, 6.60636, 6.2896967, 4.5198326, 9.7887, 13.670264, 11.878505, 11.965516, 8.576513, 11.105314, 9.192927, 11.47304, 11.367679, 9.275815, 11.367679, 9.944571, 11.590822, 10.451388, 9.511381, 11.144535, 13.264799, 5.888291, 11.227917, 10.779892, 7.643191, 11.105314, 9.414651, 11.965516, 14.363411, 12.28397, 9.909063, 8.94731, 7.0330057, 8.129001, 7.2817025, 9.874775, 9.758241, 11.105314, 5.0690055, 7.4768796, 10.129305, 9.54313, 13.264799, 9.699972, 11.878505, 8.260853, 7.1437693, 6.9321113, 6.990665, 8.8104515, 11.655361, 13.264799, 4.515912, 9.897503, 11.418972, 8.113436, 8.795067, 10.236277, 12.753973, 14.363411, 9.352776, 12.417501, 0.6271591, 12.060826, 12.060826, 12.166186, 5.2946343, 11.318889, 9.8308115, 8.016022, 9.198626, 10.8976755, 13.670264, 11.105314, 14.363411, 9.653881, 9.503599, 12.753973, 5.80546, 9.653881, 9.592727, 12.977117, 13.670264, 7.995224, 8.639826, 12.28397, 6.586876, 10.929424, 13.264799, 8.94731, 6.1026597, 12.417501, 11.47304, 10.451388, 8.95624, 10.996116, 11.144535, 11.031207, 13.670264, 13.670264, 6.397866, 7.513285, 9.981384, 11.367679, 11.590822, 7.4348736, 4.415428, 12.166186, 8.573451, 12.977117, 9.609821, 8.601359, 9.055143, 11.965516, 11.105314, 13.264799, 5.8201604, 10.451388, 9.944571, 7.7855496, 14.363411, 8.5463, 13.670264, 7.9288645, 5.7561946, 9.075144, 9.0701065, 5.6871653, 11.318889, 10.252538, 9.758241, 9.407584, 13.670264, 8.570397, 9.326459, 7.488179, 11.798462, 9.897503, 6.7530537, 4.7828183, 9.519224, 7.6492405, 8.031909, 7.8180614, 4.451856, 10.045923, 10.83705, 13.264799, 13.670264, 4.5245686, 14.363411, 10.556748, 10.556748, 14.363411, 13.670264, 14.363411, 8.037262, 8.59197, 9.738439, 8.652985, 10.045923, 9.400566, 10.9622135, 11.965516, 10.032678, 5.9017305, 9.738439, 12.977117, 11.105314, 10.725825, 9.080208, 11.272368, 14.363411, 14.363411, 13.264799, 6.9279733, 9.153925, 8.075553, 9.126969, 14.363411, 8.903826, 9.488214, 5.4571533, 10.129305, 10.579222, 12.571651, 11.965516, 6.237189, 9.428937, 9.618479, 8.620408, 11.590822, 11.655361, 9.968962, 10.8080635, 10.431585, 14.363411, 3.796231, 12.060826, 10.302968, 9.551227, 8.75394, 10.579222, 9.944571, 14.363411, 6.272396, 10.625742, 9.690582, 13.670264, 11.798462, 13.670264, 11.724354, 9.993963, 8.230013, 9.100721, 10.374427, 7.865129, 6.514087, 14.363411, 11.031207, 11.655361, 12.166186, 7.419324, 9.421769, 9.653881, 10.996116, 12.571651, 13.670264, 5.912144, 9.7887, 8.585759, 8.272101, 11.530198, 8.886948, 5.9870906, 9.269661, 11.878505, 11.227917, 13.670264, 8.339964, 7.6763024, 10.471591, 10.451388, 13.670264, 11.185357, 10.032678, 9.313555, 12.571651, 3.993144, 9.379805, 9.609821, 14.363411, 9.709451, 8.965248, 10.451388, 7.0609145, 10.579222, 13.264799, 10.49221, 8.978916, 7.124196, 10.602211, 8.9743395, 7.77862, 8.073695, 9.644913, 9.339531, 8.272101, 4.794418, 9.016304, 8.012526, 10.674532, 14.363411, 7.995224, 12.753973, 5.5157638, 8.934066, 10.779892, 7.930471, 11.724354, 8.85808, 5.9025764, 14.363411, 12.753973, 12.417501, 8.59197, 10.513264, 10.338059, 14.363411, 7.7079706, 14.363411, 13.264799, 13.264799, 10.752493, 14.363411, 14.363411, 13.264799, 12.417501, 13.670264, 6.5661197, 12.977117, 11.798462, 9.968962, 12.753973, 11.47304, 11.227917, 7.6763024, 10.779892, 11.185357, 14.363411, 7.369478, 14.363411, 9.944571, 10.779892, 10.471591, 9.54313, 9.148476, 10.285873, 10.412168, 12.753973, 14.363411, 6.0308623, 13.670264, 10.725825, 12.977117, 11.272368, 7.663911, 9.137665, 10.236277, 13.264799, 6.715625, 10.9622135, 14.363411, 13.264799, 9.575919, 9.080208, 11.878505, 7.1863923, 9.366199, 8.854023, 9.874775, 8.2857685, 13.670264, 11.878505, 12.166186, 7.616999, 9.44343, 8.288065, 8.8104515, 8.347254, 7.4738197, 10.302968, 6.936267, 11.272368, 7.058223, 5.0138307, 12.753973, 10.173757, 9.863602, 11.318889, 9.54313, 10.996116, 12.753973, 7.8339925, 7.569945, 7.4427395, 5.560738, 12.753973, 10.725825, 10.252538, 9.307165, 8.491293, 7.9161053, 7.8849015, 7.782772, 6.3088884, 8.866243, 9.8308115, 14.363411, 10.8976755, 5.908519, 10.269067, 9.176025, 9.852551, 9.488214, 8.90809, 8.537411, 9.653881, 8.662968, 11.965516, 10.143904, 14.363411, 14.363411, 9.407584, 5.281472, 11.272368, 12.060826, 14.363411, 7.4135547, 8.920994, 9.618479, 8.891141, 14.363411, 12.060826, 11.965516, 10.9622135, 10.9622135, 14.363411, 5.658909, 8.934066, 12.571651, 8.614018, 11.655361, 13.264799, 10.996116, 13.670264, 8.965248, 9.326459, 11.144535, 14.363411, 6.0517673, 10.513264, 8.7430105, 10.338059, 13.264799, 6.878481, 9.065094, 8.87035, 14.363411, 9.92076, 6.5872955, 10.32036, 14.363411, 9.944571, 11.798462, 10.9622135, 11.031207, 7.652888, 4.334878, 13.670264, 13.670264, 14.363411, 10.725825, 12.417501, 14.363411, 13.264799, 11.655361, 10.338059, 13.264799, 12.753973, 8.206432, 8.916674, 8.59509, 14.363411, 7.376845, 11.798462, 11.530198, 11.318889, 11.185357, 5.0664344, 11.185357, 9.372978, 10.471591, 9.6629305, 11.367679, 8.73579, 9.080208, 11.724354, 5.04781, 7.3777695, 7.065643, 12.571651, 11.724354, 12.166186, 12.166186, 7.215852, 4.374113, 11.655361, 11.530198, 14.363411, 6.4993753, 11.031207, 8.344818, 10.513264, 10.032678, 14.363411, 14.363411, 4.5873594, 12.28397, 13.670264, 12.977117, 10.032678, 9.609821 ], dtype = 'float32').reshape(1,600) self.binary_weight = np.array([1.6094379124341003, 0.22314355131420976], dtype = 'float32').reshape(1,2) self.num_classes = 600 # HOI self.num_binary = 2 # existence (0 or 1) of HOI self.num_fc = 1024 self.scope = 'resnet_v1_50' self.stride = [16, ] self.lr = tf.placeholder(tf.float32) if tf.__version__ == '1.1.0': self.blocks = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck,[(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3), resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)] else: # we use tf 1.2.0 here, Resnet-50 from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block self.blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), # a resnet_v1 bottleneck block resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), # feature former resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)]
def build_network(self, sess, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: #initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer = tf.contrib.layers.xavier_initializer() initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) bottleneck = resnet_v1.bottleneck # choose different blocks for different number of layers if self._num_layers == 50: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 101: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 152: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] else: # other numbers are not supported raise NotImplementedError assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4) if cfg.RESNET.FIXED_BLOCKS == 3: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) elif cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, _ = resnet_v1.resnet_v1(net, blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) else: # cfg.RESNET.FIXED_BLOCKS == 0 with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._act_summaries.append(net_conv4) self._layers['head'] = net_conv4 c=np.zeros((3,5,5)) c[0]=[[-1,2,-2,2,-1],[2,-6,8,-6,2],[-2,8,-12,8,-2],[2,-6,8,-6,2],[-1,2,-2,2,-1]] c[0]=c[0]/12 c[1][1][1]=-1 c[1][1][2]=2 c[1][1][3]=-1 c[1][2][1]=2 c[1][2][2]=-4 c[1][2][3]=2 c[1][3][1]=-1 c[1][3][2]=2 c[1][3][3]=-1 c[1]=c[1]/4 c[2][2][1]=1 c[2][2][2]=-2 c[2][2][3]=1 c[2]=c[2]/2 Wcnn=np.zeros((5,5,3,3)) for i in range(3): #k=i%10+1 #Wcnn[i]=[c[3*k-3],c[3*k-2],c[3*k-1]] Wcnn[:,:,0,i]=c[i] Wcnn[:,:,1,i]=c[i] Wcnn[:,:,2,i]=c[i] if True: with tf.variable_scope('noise'): #kernel = tf.get_variable('weights', #shape=[5, 5, 3, 3], #initializer=tf.constant_initializer(c)) conv = tf.nn.conv2d(self.noise, Wcnn, [1, 1, 1, 1], padding='SAME',name='srm') self._layers['noise']=conv with slim.arg_scope(resnet_arg_scope(is_training=is_training)): #srm_conv = tf.nn.tanh(conv, name='tanh') noise_net = resnet_utils.conv2d_same(conv, 64, 7, stride=2, scope='conv1') noise_net = tf.pad(noise_net, [[0, 0], [1, 1], [1, 1], [0, 0]]) noise_net = slim.max_pool2d(noise_net, [3, 3], stride=2, padding='VALID', scope='pool1') #net_sum=tf.concat(3,[net_conv4,noise_net]) noise_conv4, _ = resnet_v1.resnet_v1(noise_net, blocks[0:-1], global_pool=False, include_root_block=False, scope='noise') with tf.variable_scope(self._resnet_scope, self._resnet_scope): # build the anchors for the image self._anchor_component() # rpn rpn = slim.conv2d(net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError # rcnn if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv4, rois, "pool5") self._layers['pool5']=pool5 #pool5 = self._crop_pool_layer(net_sum, rois, "pool5") else: raise NotImplementedError if True: noise_pool5 = self._crop_pool_layer(noise_conv4, rois, "noise_pool5") with slim.arg_scope(resnet_arg_scope(is_training=is_training)): noise_fc7, _ = resnet_v1.resnet_v1(noise_pool5, blocks[-1:], global_pool=False, include_root_block=False, scope='noise') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v1.resnet_v1(pool5, blocks[-1:], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._layers['fc7']=fc7 with tf.variable_scope('noise_pred'): bilinear_pool=compact_bilinear_pooling_layer(fc7,noise_fc7,2048*8,compute_size=16,sequential=False) fc7=tf.Print(fc7,[tf.shape(fc7)],message='Value of %s' % 'fc', summarize=4, first_n=1) bilinear_pool=tf.reshape(bilinear_pool, [-1,2048*8]) bilinear_pool=tf.Print(bilinear_pool,[tf.shape(bilinear_pool)],message='Value of %s' % 'Blinear', summarize=4, first_n=1) bilinear_pool=tf.multiply(tf.sign(bilinear_pool),tf.sqrt(tf.abs(bilinear_pool)+1e-12)) bilinear_pool=tf.nn.l2_normalize(bilinear_pool,dim=1) noise_cls_score = slim.fully_connected(bilinear_pool, self._num_classes, weights_initializer=tf.contrib.layers.xavier_initializer(), trainable=is_training, activation_fn=None, scope='cls_score') cls_prob = self._softmax_layer(noise_cls_score, "cls_prob") fc7 = tf.reduce_mean(fc7, axis=[1, 2]) bbox_pred = slim.fully_connected(fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = noise_cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._score_summaries.update(self._predictions) return rois, cls_prob,bbox_pred
def build_network(self, sess, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) bottleneck = resnet_v1.bottleneck # choose different blocks for different number of layers if self._num_layers == 50: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 101: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 152: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] else: # other numbers are not supported raise NotImplementedError assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4) if cfg.RESNET.FIXED_BLOCKS == 3: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1( net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) elif cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, _ = resnet_v1.resnet_v1( net, blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) else: # cfg.RESNET.FIXED_BLOCKS == 0 with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._act_summaries.append(net_conv4) self._layers['head'] = net_conv4 with tf.variable_scope(self._resnet_scope, self._resnet_scope): # build the anchors for the image self._anchor_component() # rpn rpn = slim.conv2d(net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer( rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer( rpn_cls_prob, rpn_bbox_pred, "rois") rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer( rois, roi_scores, "rpn_rois") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError # rcnn if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv4, rois, "pool5") else: raise NotImplementedError with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v1.resnet_v1(pool5, blocks[-1:], global_pool=False, include_root_block=False, scope=self._resnet_scope) with tf.variable_scope(self._resnet_scope, self._resnet_scope): # Average pooling done by reduce_mean fc7 = tf.reduce_mean(fc7, axis=[1, 2]) cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score') cls_prob = self._softmax_layer(cls_score, "cls_prob") bbox_pred = slim.fully_connected( fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def __init__(self, model_name): self.model_name = model_name self.visualize = {} self.test_visualize = {} self.intermediate = {} self.predictions = {} self.score_summaries = {} self.event_summaries = {} self.train_summaries = [] self.losses = {} self.image = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image') self.spatial = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name = 'sp') # self.Hsp_boxes = tf.placeholder(tf.float32, shape=[None, 5], name = 'Hsp_boxes') self.H_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='H_boxes') self.O_boxes = tf.placeholder(tf.float32, shape=[None, 5], name = 'O_boxes') self.gt_class_H = tf.placeholder(tf.float32, shape=[None, 24], name = 'gt_class_H') self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 24], name = 'gt_class_HO') self.gt_class_sp = tf.placeholder(tf.float32, shape=[None, 24], name = 'gt_class_sp') self.Mask_HO = tf.placeholder(tf.float32, shape=[None, 24], name = 'HO_mask') self.Mask_H = tf.placeholder(tf.float32, shape=[None, 24], name = 'H_mask') self.Mask_sp = tf.placeholder(tf.float32, shape=[None, 24], name = 'sp_mask') self.gt_compose = tf.placeholder(tf.float32, shape=[None, 222], name='gt_compose') self.gt_obj = tf.placeholder(tf.float32, shape=[None, 80], name='gt_obj') self.H_num = tf.placeholder(tf.int32) self.image_id = tf.placeholder(tf.int32) self.num_classes = 24 if self.model_name.__contains__('_t4_'): self.num_classes = 222 if self.model_name.__contains__('_t5_'): self.verb_num_classes = 21 self.num_classes = 222 self.num_fc = 1024 self.verb_num_classes = 24 self.obj_num_classes = 80 self.scope = 'resnet_v1_50' self.stride = [16, ] # self.lr = tf.placeholder(tf.float32) if tf.__version__ == '1.1.0': self.blocks = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck,[(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3), resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)] else: from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block self.blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)] if self.model_name.__contains__('unique_weights'): print("unique_weights2") self.blocks.append(resnet_v1_block('block6', base_depth=512, num_units=3, stride=1)) # remove 3, 17 22, 23 27 self.HO_weight = np.array([3.3510249, 3.4552405, 4.0257854, 4.088436, 3.4370995, 3.85842, 4.637334, 3.5487218, 3.536237, 2.5578923, 3.342811, 3.8897269, 4.70686, 3.3952892, 3.9706533, 4.504736, 1.4873443, 3.700363, 4.1058283, 3.6298118, 5.0808263, 1.520838, 3.3888445, 3.9899964], dtype='float32').reshape(1, 24) self.H_weight = np.array([4.0984106, 4.102459, 4.0414762, 4.0414762, 3.9768186, 4.23686, 5.3542085, 3.723717, 3.4699364, 2.4587274, 3.7167964, 4.08836, 5.050695, 3.9077065, 4.534647, 3.4699364, 1.8585607, 3.9433942, 3.9433942, 4.3523254, 5.138182, 1.7807873, 4.080392, 4.5761204], dtype='float32').reshape(1, 24) self.reset_classes()
def __init__(self): self.visualize = {} self.intermediate = {} self.predictions = {} self.score_summaries = {} self.event_summaries = {} self.train_summaries = [] self.losses = {} self.image = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='image') self.spatial = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name='sp') self.H_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='H_boxes') self.O_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='O_boxes') self.Part0 = tf.placeholder(tf.float32, shape=[None, 5], name='Part0_boxes') self.Part1 = tf.placeholder(tf.float32, shape=[None, 5], name='Part1_boxes') self.Part2 = tf.placeholder(tf.float32, shape=[None, 5], name='Part2_boxes') self.Part3 = tf.placeholder(tf.float32, shape=[None, 5], name='Part3_boxes') self.Part4 = tf.placeholder(tf.float32, shape=[None, 5], name='Part4_boxes') self.Part5 = tf.placeholder(tf.float32, shape=[None, 5], name='Part5_boxes') self.Part6 = tf.placeholder(tf.float32, shape=[None, 5], name='Part6_boxes') self.Part7 = tf.placeholder(tf.float32, shape=[None, 5], name='Part7_boxes') self.Part8 = tf.placeholder(tf.float32, shape=[None, 5], name='Part8_boxes') self.Part9 = tf.placeholder(tf.float32, shape=[None, 5], name='Part9_boxes') self.gt_binary_label = tf.placeholder(tf.float32, shape=[None, 2], name='gt_binary_label') self.gt_binary_label_10v = tf.placeholder(tf.float32, shape=[None, 10, 2], name='gt_binary_label') self.num_vec = 10 self.H_num = tf.placeholder(tf.int32) self.binary_weight = np.array( [1.6094379124341003, 0.22314355131420976], dtype='float32').reshape(1, 2) self.num_classes = 600 # HOI self.num_binary = 2 # existence (0 or 1) of HOI self.num_fc = 1024 self.scope = 'resnet_v1_50' self.stride = [ 16, ] self.lr = tf.placeholder(tf.float32) if tf.__version__ == '1.1.0': self.blocks = [ resnet_utils.Block('block1', resnet_v1.bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', resnet_v1.bottleneck, [(2048, 512, 1)] * 3), resnet_utils.Block('block5', resnet_v1.bottleneck, [(2048, 512, 1)] * 3) ] else: # we use tf 1.2.0 here, Resnet-50 from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block self.blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), # a resnet_v1 bottleneck block resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), # feature former resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), resnet_v1_block('block5', base_depth=512, num_units=3, stride=1) ]
def _decide_blocks(self): # choose different blocks for different number of layers if self._num_layers == 50: if tf.__version__ == '1.1.0': self._blocks = [ resnet_utils.Block('block1', resnet_v1.bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', resnet_v1.bottleneck, [(2048, 512, 1)] * 3) ] else: from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block self._blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] elif self._num_layers == 101: self._blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=23, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] elif self._num_layers == 152: self._blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=8, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=36, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] else: # other numbers are not supported raise NotImplementedError
def build_network(self, sess, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) bottleneck = resnet_v1.bottleneck blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] if cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net, _ = resnet_v1.resnet_v1(self._image, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=True, scope='resnet_v1_101') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv5, _ = resnet_v1.resnet_v1( net, blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, scope='resnet_v1_101') else: with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv5, _ = resnet_v1.resnet_v1(self._image, blocks[0:-1], global_pool=False, include_root_block=True, scope='resnet_v1_101') self._act_summaries.append(net_conv5) self._layers['conv5_3'] = net_conv5 with tf.variable_scope('resnet_v1_101', 'resnet_v1_101', regularizer=tf.contrib.layers.l2_regularizer( cfg.TRAIN.WEIGHT_DECAY)): # build the anchors for the image self._anchor_component() # rpn # rpn = self._conv_layer_shape(net, [3, 3], 512, "rpn_conv/3x3", initializer, train) if cfg.TRAIN.BIAS_DECAY: biases_regularizer = None else: biases_regularizer = tf.no_regularizer rpn = slim.conv2d(net_conv5, 512, [3, 3], trainable=is_training, weights_initializer=initializer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0), scope="rpn_conv/3x3") self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d( rpn, self._num_scales * 6, [1, 1], trainable=is_training, weights_initializer=initializer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0), padding='VALID', activation_fn=None, scope='rpn_cls_score') # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer( rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_scales * 6, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d( rpn, self._num_scales * 12, [1, 1], trainable=is_training, weights_initializer=initializer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0), padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer( rpn_cls_prob, rpn_bbox_pred, "rois") rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer( rois, roi_scores, "rpn_rois") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError # rcnn if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv5, rois, "pool5") else: raise NotImplementedError with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v1.resnet_v1(pool5, blocks[-1:], global_pool=False, include_root_block=False, scope='resnet_v1_101') with tf.variable_scope('resnet_v1_101', 'resnet_v1_101', regularizer=tf.contrib.layers.l2_regularizer( cfg.TRAIN.WEIGHT_DECAY)): # Average pooling done by reduce_mean fc7 = tf.reduce_mean(fc7, axis=[1, 2]) cls_score = slim.fully_connected( fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, biases_regularizer=biases_regularizer, activation_fn=None, scope='cls_score') cls_prob = self._softmax_layer(cls_score, "cls_prob") bbox_pred = slim.fully_connected( fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, biases_regularizer=biases_regularizer, activation_fn=None, scope='bbox_pred') self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def __init__(self, num_sgnodes=17, num_agnodes=6 + 17, is_training=True, use_skebox=False, use_bodypart=False, use_pm=False, use_u=False, use_sg=False, use_sg_att=False, use_ag=False, use_ag_att=False, use_binary=False, use_Hsolo=False): # Control the network architecture self.use_skebox = use_skebox # whether use skeleton box self.use_bp = use_bodypart # whether use body part self.use_pm = use_pm # whether use pose map self.use_u = use_u # whether use union box self.use_sg = use_sg # whether use spatial graph self.use_sg_att = use_sg_att # whether use spatial graph attention self.use_ag = use_ag # whether use appearance graph attention self.use_ag_att = use_ag_att # whether use appearance attention self.use_binary = use_binary # whether train binary module self.use_Hsolo = use_Hsolo # Annotation feed self.gt_binary_label = tf.placeholder(tf.float32, shape=[None, 1], name='gt_binary_label') self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 600], name='gt_class_HO') # Training utils self.predictions = {} self.losses = {} self.lr = tf.placeholder(tf.float32) self.num_binary = 1 # existence of HOI (0 or 1) self.num_classes = 600 self.is_training = is_training self.keep_prob = 0.8 if self.is_training else 1 # Training data feed self.image = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='image') self.head = tf.placeholder(tf.float32, shape=[1, None, None, 1024], name='head') self.H_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='H_boxes') self.O_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='O_boxes') self.U_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='U_boxes') self.skeboxes = tf.placeholder(tf.float32, shape=[None, 17, 5], name='part_boxes') self.bodyparts = tf.placeholder(tf.float32, shape=[None, 6, 5], name='bodypart_boxes') self.spatial = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name='sp') self.semantic = tf.placeholder(tf.float32, shape=[None, 768], name='semantic') self.H_num = tf.placeholder(tf.int32) self.H_num_neg = tf.placeholder(tf.int32) # ResNet 50 Network self.scope = 'resnet_v1_50' self.num_fc = 1024 self.num_fc2 = 1024 self.stride = [16, ] if tf.__version__ == '1.1.0': self.blocks = [resnet_utils.Block('block1', resnet_v1.bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', resnet_v1.bottleneck, [(2048, 512, 1)] * 3), resnet_utils.Block('block5', resnet_v1.bottleneck, [(2048, 512, 1)] * 3)] else: from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block self.blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)] # GCN setting # Spatial GCN self.num_SGnodes = num_sgnodes self.SGraph = Graph(num_node=self.num_SGnodes) self.ori_As = tf.convert_to_tensor(self.SGraph.ori_A.astype(np.float32)) self.As = tf.convert_to_tensor(self.SGraph.A.astype(np.float32)) # list for partition, N, V, V self.spatial_kernel_size = self.As.shape[0] self.SGinput = tf.placeholder(tf.float32, shape=[None, self.num_SGnodes, 1, 5, 1], name='Gnodes') # [N, V, T, C, M] = [N, 19, 1, 3, 1] # Appearance GCN self.num_AGnodes = num_agnodes self.AGraph = Graph(num_node=self.num_AGnodes) self.ori_Aa = tf.convert_to_tensor(self.AGraph.ori_A.astype(np.float32)) self.Aa = tf.convert_to_tensor(self.AGraph.A.astype(np.float32)) # list for partition, N, V, V self.binary_type = 0
def build_network(self, sess, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) bottleneck = resnet_v1.bottleneck # choose different blocks for different number of layers if self._num_layers == 50: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 101: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 152: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] else: # other numbers are not supported raise NotImplementedError with tf.variable_scope('noise'): conv=slim.conv2d(self.noise, num_outputs=3, kernel_size=[5,5], stride=1 , padding='SAME', activation_fn=None, trainable=is_training, scope='constrained_conv') self._layers['noise']=conv with slim.arg_scope(resnet_arg_scope(is_training=is_training)): #assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4) C_1 = self.build_base(conv) C_2, _ = resnet_v1.resnet_v1(C_1, blocks[0:1], global_pool=False, include_root_block=False, scope=self._resnet_scope) #net=self.cbam_module(inputs=net,name="cbam_layer_1") with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C_3, _ = resnet_v1.resnet_v1(C_2, blocks[1:2], global_pool=False, include_root_block=False, scope=self._resnet_scope) #net = self.cbam_module(inputs=net, name="cbam_layer_2") with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C_4, end_point = resnet_v1.resnet_v1(C_3, blocks[2:3], global_pool=False, include_root_block=False, scope=self._resnet_scope) # mask_C_4 = self.cbam_module(inputs=C_4, name="C_4") self.end_point=end_point self._act_summaries.append(C_4) self._layers['head'] = C_4 self._layers['C1'] = C_1 self._layers['C2'] = C_2 self._layers['C3'] = C_3 with tf.variable_scope(self._resnet_scope, self._resnet_scope): # build the anchors for the image self._anchor_component() # rpn rpn1 = slim.conv2d(C_4, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._layers['rpn1'] = rpn1 rpn = self.cbam_module(inputs=rpn1, name="rpn_conv1") self._layers['rpn'] = rpn self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') self._layers['rpn_cls_score'] = rpn_cls_score # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError # rcnn if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(C_4, rois, "pool5") #pool5 = self._crop_pool_layer(net_sum, rois, "pool5") else: raise NotImplementedError with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, end_point1 = resnet_v1.resnet_v1(pool5, blocks[-1:], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._layers['fc7']=fc7 # self._layers['pool5'] =pool5 self.end_point1=end_point1 with tf.variable_scope(self._resnet_scope, self._resnet_scope): cls_fc7 = tf.reduce_mean(fc7, axis=[1, 2]) cls_score = slim.fully_connected(cls_fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score') cls_prob = self._softmax_layer(cls_score, "cls_prob") box_fc7=tf.reduce_mean(fc7, axis=[1, 2]) bbox_pred = slim.fully_connected(box_fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') if cfg.USE_MASK is True: with tf.variable_scope('feature_fuse', 'feature_fuse'): mask_fuse = C_3 * 0.5 + rpn * 0.5 feature_fuse = slim.conv2d(mask_fuse, 1024, [1, 1], padding='VALID', trainable=is_training, weights_initializer=initializer, scope='mask_fuse') mask_box, indices = self._proposal_mask_layer(cls_prob, bbox_pred, rois, 'mask_proposal') mask_pool5 = self._crop_pool_layer(feature_fuse, mask_box, "mask_pool5") with slim.arg_scope(resnet_arg_scope(is_training=is_training)): mask_fc7, _ = resnet_v1.resnet_v1(mask_pool5, blocks[-1:], global_pool=False, include_root_block=False, scope='mask_conv') self._act_summaries.append(mask_fc7) with tf.variable_scope('mask_predict', 'mask_predict'): upsampled_features=slim.conv2d_transpose(mask_fc7,256,2,2,activation_fn=None) self._act_summaries.append(upsampled_features) upsampled_features = slim.conv2d(upsampled_features, 64, [1, 1], normalizer_fn=slim.batch_norm, activation_fn=None,padding='VALID') self._act_summaries.append(upsampled_features) upsampled_features = slim.batch_norm(upsampled_features, activation_fn=None) self._act_summaries.append(upsampled_features) upsampled_features = tf.nn.relu(upsampled_features) self._act_summaries.append(upsampled_features) mask_predictions = slim.conv2d(upsampled_features, num_outputs=2,activation_fn=None, kernel_size=[1, 1],padding='VALID') self._act_summaries.append(mask_predictions) self._predictions["mask_out"] = tf.expand_dims(mask_predictions[:, :, :, 1], 3) mask_softmax=tf.nn.softmax(mask_predictions) self._predictions["mask_softmaxbg"] = tf.expand_dims(mask_softmax[:, :, :, 0], 3) self._predictions["mask_softmaxfg"] = tf.expand_dims(mask_softmax[:, :, :, 1], 3) self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._predictions["mask_pred"] = mask_predictions self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred, mask_predictions else: self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def __init__(self): self.visualize = {} self.intermediate = {} self.predictions = {} self.score_summaries = {} self.event_summaries = {} self.train_summaries = [] self.losses = {} self.image = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='image') #scene stream self.H_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='H_boxes') # Human stream self.P_boxes = tf.placeholder(tf.float32, shape=[None, 10, 5], name='P_boxes') # PaSta stream self.gt_verb = tf.placeholder(tf.float32, shape=[None, 80], name='gt_class_verb') # target verb self.H_num = tf.placeholder(tf.int32) self.verb_weight = np.array(verb80, dtype='float32').reshape(1, -1) self.num_classes = 80 # HOI self.num_pasta0 = 12 # pasta0 ankle self.num_pasta1 = 10 # pasta1 knee self.num_pasta2 = 5 # pasta2 hip self.num_pasta3 = 31 # pasta3 hand self.num_pasta4 = 5 # pasta4 shoulder self.num_pasta5 = 13 # pasta5 head self.num_fc = 1024 self.scope = 'resnet_v1_50' self.stride = [ 16, ] self.lr = tf.placeholder(tf.float32) if tf.__version__ == '1.1.0': self.blocks = [ resnet_utils.Block('block1', resnet_v1.bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', resnet_v1.bottleneck, [(2048, 512, 1)] * 3), resnet_utils.Block('block5', resnet_v1.bottleneck, [(2048, 512, 1)] * 3) ] else: # we use tf 1.2.0 here, Resnet-50 from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block self.blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), # a resnet_v1 bottleneck block resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), # feature former resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), resnet_v1_block('block5', base_depth=512, num_units=3, stride=1) ]
def __init__(self, in_channels=3, num_classes=0, num_nodes=17 + 2, edge_importance_weighting=True, is_training=True, num_fc=1024, posetype=1, bi_posegraph=False, bodypart=False, binary=False, posemap=False, posegraph=False, semantic=False, data_bn=True): self.predictions = {} self.train_summaries = [] self.losses = {} self.lr = tf.placeholder(tf.float32) self.num_binary = 1 # existence of HOI (0 or 1) self.num_classes = 600 self.gt_binary_label = tf.placeholder(tf.float32, shape=[None, 1], name='gt_binary_label') self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 600], name='gt_class_HO') self.is_training = is_training if self.is_training: self.keep_prob = cfg.TRAIN_DROP_OUT_BINARY self.keep_prob_tail = .5 else: self.keep_prob = 1 self.keep_prob_tail = 1 self.image = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='image') self.head = tf.placeholder(tf.float32, shape=[1, None, None, 1024], name='head') self.H_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='H_boxes') self.O_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='O_boxes') self.partboxes = tf.placeholder(tf.float32, shape=[None, 17, 5], name='part_boxes') self.semantic = tf.placeholder(tf.float32, shape=[None, 1024], name='semantic_feat') self.H_num = tf.placeholder(tf.int32) # Control the network architecture self.bodypart = bodypart self.binary = binary self.posemap = posemap self.posegraph = posegraph self.bi_posegraph = bi_posegraph self.posetype = posetype self.semantic_flag = semantic if self.posetype == 1: self.spatial = tf.placeholder(tf.float32, shape=[None, 64, 64, 2], name='sp') else: self.spatial = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name='sp') # ResNet 50 Network self.scope = 'resnet_v1_50' self.num_fc = 1024 self.num_fc2 = num_fc self.stride = [ 16, ] if tf.__version__ == '1.1.0': self.blocks = [ resnet_utils.Block('block1', resnet_v1.bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', resnet_v1.bottleneck, [(2048, 512, 1)] * 3), resnet_utils.Block('block5', resnet_v1.bottleneck, [(2048, 512, 1)] * 3) ] else: from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block self.blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), resnet_v1_block('block5', base_depth=512, num_units=3, stride=1) ] # GCN setting self.num_nodes = num_nodes self.c = in_channels self.data_bn = data_bn self.strategy = 'spatial' self.graph = Graph(strategy=self.strategy) self.A = tf.convert_to_tensor(self.graph.A.astype( np.float32)) # [None, num_nodes, num_nodes] self.spatial_kernel_size = self.A.shape[0] # [N, C, T, V, M] = [N, 3, 1, 19, 1] self.Gnodes = tf.placeholder( tf.float32, shape=[None, self.c, 1, self.num_nodes, 1], name='Gnodes') # ST_GCN self.depth_st_gcn_networks = 10
def build_network(self, sess, is_training=True, ver=''): # select initializers if cfg.FLAGS.initializer: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) bottleneck = resnet_v1.bottleneck # choose different blocks for different number of layers def resnet_v1_block(scope, bottleneck, base_depth, num_units, stride): return resnet_utils.Block(scope, bottleneck, [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': 1 }] * (num_units - 1) + [{ 'depth': base_depth * 4, 'depth_bottleneck': base_depth, 'stride': stride }]) if self._num_layers == 50: blocks = [resnet_v1_block('block1'+ver, bottleneck, base_depth=64, num_units=3, stride=2), resnet_v1_block('block2'+ver, bottleneck, base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3'+ver, bottleneck, base_depth=256, num_units=6, stride=1), resnet_v1_block('block4'+ver, bottleneck, base_depth=512, num_units=3, stride=1), ] elif self._num_layers == 101: blocks = [ resnet_utils.Block('block1'+ver, bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2'+ver, bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3'+ver, bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 1)]), resnet_utils.Block('block4'+ver, bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 152: blocks = [ resnet_utils.Block('block1'+ver, bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2'+ver, bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3'+ver, bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 1)]), resnet_utils.Block('block4'+ver, bottleneck, [(2048, 512, 1)] * 3) ] else: # other numbers are not supported raise NotImplementedError assert (0 <= cfg.FLAGS.fixed_blocks < 4) if cfg.FLAGS.fixed_blocks == 3: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.FLAGS.fixed_blocks], global_pool=False, include_root_block=False, scope=self._resnet_scope) net_noise = self.build_base(ver='n', init=initializer) net_conv4_noise, _ = resnet_v1.resnet_v1(net_noise, blocks[0:cfg.FLAGS.fixed_blocks], global_pool=False, include_root_block=False, scope=self._resnet_scope, reuse=True) elif cfg.FLAGS.fixed_blocks > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.FLAGS.fixed_blocks], global_pool=False, include_root_block=False, scope=self._resnet_scope, reuse=tf.AUTO_REUSE) net_noise = self.build_base(ver='n', init=initializer) net_noise, _ = resnet_v1.resnet_v1(net_noise, blocks[0:cfg.FLAGS.fixed_blocks], global_pool=False, include_root_block=False, scope=self._resnet_scope, reuse=tf.AUTO_REUSE) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, _ = resnet_v1.resnet_v1(net, blocks[cfg.FLAGS.fixed_blocks:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope, reuse=tf.AUTO_REUSE) net_conv4_noise, _ = resnet_v1.resnet_v1(net_noise, blocks[0:cfg.FLAGS.fixed_blocks], global_pool=False, include_root_block=False, scope=self._resnet_scope, reuse=tf.AUTO_REUSE) else: # cfg.RESNET.FIXED_BLOCKS == 0 with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope, reuse=tf.AUTO_REUSE) net_noise = self.build_base(ver='n', init=initializer) net_conv4_noise, _ = resnet_v1.resnet_v1(net_noise, blocks[0:cfg.FLAGS.fixed_blocks], global_pool=False, include_root_block=False, scope=self._resnet_scope, reuse=tf.AUTO_REUSE) self._act_summaries.append(net_conv4) self._layers['head'] = net_conv4 with tf.variable_scope(self._resnet_scope, self._resnet_scope): # build the anchors for the image self._anchor_component() # rpn rpn = slim.conv2d(net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois") else: if cfg.FLAGS.test_mode == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.FLAGS.test_mode == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError # rcnn if cfg.FLAGS.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv4, rois, "pool5") pool5_forNoise = self._crop_pool_layer(net_conv4_noise, rois, "pool5n") # Compact Bilinear Pooling cbp = compact_bilinear_pooling_layer(pool5, pool5_forNoise, 1024) cbp_flat = slim.flatten(cbp, scope='cbp_flatten') else: raise NotImplementedError with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7 = slim.fully_connected(cbp_flat, 4096, scope='fc6') with tf.variable_scope(self._resnet_scope, self._resnet_scope): # Average pooling done by reduce_mean #fc7 = tf.reduce_mean(fc7, axis=[1, 2]) cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score') cls_prob = self._softmax_layer(cls_score, "cls_prob") bbox_pred = slim.fully_connected(fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred