def testEndPointsV1(self): """Test the end points of a tiny v1 bottleneck network.""" blocks = [ resnet_v1.resnet_v1_block('block1', base_depth=1, num_units=2, stride=2), resnet_v1.resnet_v1_block('block2', base_depth=2, num_units=2, stride=1), ] inputs = create_test_input(2, 32, 16, 3) with arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = self._resnet_plain(inputs, blocks, scope='tiny') expected = [ 'tiny/block1/unit_1/bottleneck_v1/shortcut', 'tiny/block1/unit_1/bottleneck_v1/conv1', 'tiny/block1/unit_1/bottleneck_v1/conv2', 'tiny/block1/unit_1/bottleneck_v1/conv3', 'tiny/block1/unit_2/bottleneck_v1/conv1', 'tiny/block1/unit_2/bottleneck_v1/conv2', 'tiny/block1/unit_2/bottleneck_v1/conv3', 'tiny/block2/unit_1/bottleneck_v1/shortcut', 'tiny/block2/unit_1/bottleneck_v1/conv1', 'tiny/block2/unit_1/bottleneck_v1/conv2', 'tiny/block2/unit_1/bottleneck_v1/conv3', 'tiny/block2/unit_2/bottleneck_v1/conv1', 'tiny/block2/unit_2/bottleneck_v1/conv2', 'tiny/block2/unit_2/bottleneck_v1/conv3' ] self.assertItemsEqual(expected, end_points)
def testEndPointsV1(self): """Test the end points of a tiny v1 bottleneck network.""" blocks = [ resnet_v1.resnet_v1_block( 'block1', base_depth=1, num_units=2, stride=2), resnet_v1.resnet_v1_block( 'block2', base_depth=2, num_units=2, stride=1), ] inputs = create_test_input(2, 32, 16, 3) with arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = self._resnet_plain(inputs, blocks, scope='tiny') expected = [ 'tiny/block1/unit_1/bottleneck_v1/shortcut', 'tiny/block1/unit_1/bottleneck_v1/conv1', 'tiny/block1/unit_1/bottleneck_v1/conv2', 'tiny/block1/unit_1/bottleneck_v1/conv3', 'tiny/block1/unit_2/bottleneck_v1/conv1', 'tiny/block1/unit_2/bottleneck_v1/conv2', 'tiny/block1/unit_2/bottleneck_v1/conv3', 'tiny/block2/unit_1/bottleneck_v1/shortcut', 'tiny/block2/unit_1/bottleneck_v1/conv1', 'tiny/block2/unit_1/bottleneck_v1/conv2', 'tiny/block2/unit_1/bottleneck_v1/conv3', 'tiny/block2/unit_2/bottleneck_v1/conv1', 'tiny/block2/unit_2/bottleneck_v1/conv2', 'tiny/block2/unit_2/bottleneck_v1/conv3'] self.assertItemsEqual(expected, end_points)
def resnet_v1_50(inputs, num_classes=None, is_training=True, global_pool=True, output_stride=None, include_root_block=True, reuse=None, scope='resnet_v1_50'): """ ResNet-50 model of [1]. See resnet_v1() for arg and return description. (same as what's in slim library now but reversing the 1 stride to accommodate the unet model) """ blocks = [ resnet_v1.resnet_v1_block('block1', base_depth=64, num_units=3, stride=1), resnet_v1.resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1.resnet_v1_block('block3', base_depth=256, num_units=6, stride=2), resnet_v1.resnet_v1_block('block4', base_depth=512, num_units=3, stride=2), ] return resnet_v1.resnet_v1( inputs, blocks, num_classes, is_training=is_training, global_pool=global_pool, output_stride=output_stride, include_root_block=include_root_block, reuse=reuse, scope=scope)
def _decide_blocks(self): # configuration of blocks for 41 layers - a shallower ResNet # tested in Experiment 4 in project report # units in blocks: 3 x 64 + 3 x 128 + 4 x 256 + 3 x 512 if self._num_layers == 41: self._blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=3, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=4, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] else: # other numbers are not supported raise NotImplementedError
def _decide_blocks(self): # choose different blocks for different number of layers self._blocks = [ resnet_v1_block('block1', base_depth=16, num_units=3, stride=2), resnet_v1_block('block2', base_depth=32, num_units=4, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=64, num_units=6, stride=1), resnet_v1_block('block4', base_depth=128, num_units=3, stride=1) ]
def __init__(self, is_training): self.name = self.__class__.__name__.lower() self.vars = {} self.layers = [] self.activations = [] self.visualize = {} self.intermediate = {} self.predictions = {} self.losses = {} self.score_summaries = {} self.event_summaries = {} self.image = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image') self.H_boxes = tf.placeholder(tf.float32, shape=[None , 5], name = 'H_boxes') self.O_boxes = tf.placeholder(tf.float32, shape=[None , 5], name = 'O_boxes') self.H_boxes_enc = tf.placeholder(tf.float32, shape=[None , 4], name = 'H_boxes_enc') self.O_boxes_enc = tf.placeholder(tf.float32, shape=[None , 4], name = 'O_boxes_enc') self.HO_boxes_enc= tf.placeholder(tf.float32, shape=[None , 4], name = 'HO_boxes_enc') self.gt_class_H = tf.placeholder(tf.float32, shape=[None, 26], name = 'gt_class_H') self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 26], name = 'gt_class_HO') self.Mask_HO = tf.placeholder(tf.float32, shape=[None, 26], name = 'HO_mask') self.Mask_H = tf.placeholder(tf.float32, shape=[None, 26], name = 'H_mask') self.H_num = tf.placeholder(tf.int32) # pos self.ivs = tf.placeholder(tf.int32, shape=[26], name = 'idx_GT_verbs') self.inputs = tf.placeholder(tf.float32, shape=[None, 300], name = 'embedding') self.support = [tf.sparse_placeholder(tf.float32) for _ in range(1)] self.num_nonzero = tf.placeholder(tf.int32) self.in_dim = 300 self.hidden_dim = 512 self.out_dim = 512 self.num_classes = 26 self.scope = 'resnet_v1_50' self.stride = [16, ] self.train = is_training self.now_lr = None self.optimizer = None self.opt_op = None if tf.__version__ == '1.1.0': self.blocks = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck,[(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3), resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)] else: from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block self.blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)] self.build_all()
def build(self, inputs, input_pixel_size, is_training, scope='resnet_v1_101'): ## scope is important variable to set """ resnet args: inputs: a tensor of size [batch_size, height, width, channels]. input_pixel_size: size of the input (H x W) is_training: True for training, False for validation/testing. scope: Optional scope for the variables. Returns: The last op containing the log predictions and end_points dict. """ res_config = self.config fixed_block = res_config.fixed_block self._scope = scope import pudb pudb.set_trace() # XXX BREAKPOINT with slim.arg_scope(resnet_arg_scope(is_training=False)): net_base = self._build_base(inputs) blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=23, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv1, net_dict1 = resnet_v1.resnet_v1( net_base, blocks[0:fixed_block], global_pool=False, include_root_block=False, ## no resue scope=self._scope) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv2, net_dict2 = resnet_v1.resnet_v1( net_conv1, blocks[fixed_block:], global_pool=False, include_root_block=False, scope=self._scope) feature_maps_out = net_conv2 # pyramid_fusion3 #pyramid_fusion1 #end_points = slim.utils.convert_collection_to_dict( # end_points_collection) return feature_maps_out, net_dict2 #end_points
def __init__(self): self.visualize = {} self.intermediate = {} self.predictions = {} self.score_summaries = {} self.event_summaries = {} self.train_summaries = [] self.losses = {} self.image = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image') self.spatial = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name = 'sp') # Pattern.reshape( num_pos_neg, 64, 64, 3) self.Hsp_boxes = tf.placeholder(tf.float32, shape=[None, 5], name = 'Hsp_boxes') self.O_boxes = tf.placeholder(tf.float32, shape=[None, 5], name = 'O_boxes') #Object_augmented[:num_pos].reshape(num_pos, 5) self.gt_class_H = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_H') self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_HO') self.gt_class_sp = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_sp') self.Mask_HO = tf.placeholder(tf.float32, shape=[None, 29], name = 'HO_mask') self.Mask_H = tf.placeholder(tf.float32, shape=[None, 29], name = 'H_mask') self.Mask_sp = tf.placeholder(tf.float32, shape=[None, 29], name = 'sp_mask') self.gt_binary_label = tf.placeholder(tf.float32, shape=[None, 2], name = 'gt_binary_label') self.H_num = tf.placeholder(tf.int32) self.HO_weight = np.array([3.3510249, 3.4552405, 4.0257854, 0.0, 4.088436, 3.4370995, 3.85842, 4.637334, 3.5487218, 3.536237, 2.5578923, 3.342811, 3.8897269, 4.70686, 3.3952892, 3.9706533, 4.504736, 0.0, 1.4873443, 3.700363, 4.1058283, 3.6298118, 0.0, 6.490651, 5.0808263, 1.520838, 3.3888445, 0.0, 3.9899964], dtype = 'float32').reshape(1,29) self.H_weight = np.array([4.0984106, 4.102459, 4.0414762, 4.060745, 4.0414762, 3.9768186, 4.23686, 5.3542085, 3.723717, 3.4699364, 2.4587274, 3.7167964, 4.08836, 5.050695, 3.9077065, 4.534647, 3.4699364, 2.9466882, 1.8585607, 3.9433942, 3.9433942, 4.3523254, 3.8368235, 6.4963055, 5.138182, 1.7807873, 4.080392, 1.9544303, 4.5761204],dtype = 'float32').reshape(1,29) self.binary_weight = np.array([1.0986122886681098, 0.4054651081081644], dtype = 'float32').reshape(1,2) self.num_classes = 29 self.num_binary = 2 # existence (0 or 1) of HOI self.num_fc = 1024 self.scope = 'resnet_v1_50' self.stride = [16, ] self.lr = tf.placeholder(tf.float32) if tf.__version__ == '1.1.0': self.blocks = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck,[(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3), resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)] else: from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block self.blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)]
def _decide_block(self): # choose different blocks for different number of layers base_depths = list() strides = list() self.blocks = list() scopes = ["block1", "block2", "block3", "block4"] for i in range(2): base_depths.append(int(math.pow(2, i+6))) strides.append(2) for i in range(2): base_depths.append(int(math.pow(2, i + 8))) strides.append(1) if self.num_layers == 50: num_units = [3, 4, 6, 3] elif self.num_layers == 101: num_units = [3, 4, 23, 3] elif self.num_layers == 152: num_units = [3, 8, 36, 3] else: # other numbers are not supported raise NotImplementedError for i in range(4): self.blocks.append(resnet_v1_block( scope=scopes[i], base_depth=base_depths[i], num_units=num_units[i], stride=strides[i]))
def resnet_faster_rcnn_head(input, params): """ Derived from https://github.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow Args: input: params: Returns: """ with tf.variable_scope('resnet_head', reuse=tf.AUTO_REUSE): block4 = [ resnet_v1_block('block4', base_depth=256, num_units=3, stride=1) ] with slim.arg_scope(norm_arg_scope(params)): C5, _ = resnet_v1.resnet_v1(input, block4, global_pool=False, include_root_block=False, scope='resnet_v1_50', reuse=tf.AUTO_REUSE) return C5
def restnet_head(input, is_training, scope_name, stage): if stage == 'stage1': block4 = [ resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C5, _ = resnet_v1.resnet_v1(input, block4, global_pool=False, include_root_block=False, scope=scope_name) # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape') flatten = tf.reduce_mean(C5, axis=[1, 2], keep_dims=False, name='global_average_pooling') # C5_flatten = tf.Print(C5_flatten, [tf.shape(C5_flatten)], summarize=10, message='C5_flatten_shape') # global average pooling C5 to obtain fc layers else: fc_flatten = slim.flatten(input) net = slim.fully_connected(fc_flatten, 1024, scope='fc_1_{}'.format(stage)) net = slim.dropout(net, keep_prob=0.5, is_training=is_training, scope='dropout_{}'.format(stage)) flatten = slim.fully_connected(net, 1024, scope='fc_2_{}'.format(stage)) return flatten
def _decide_blocks(self): self._blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=23, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] self._blocks2 = [resnet_v1_block('block1_prev', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2_prev', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3_prev', base_depth=256, num_units=23, stride=1), resnet_v1_block('block4_prev', base_depth=512, num_units=3, stride=1)]
def resnet_v1_101_c4(inputs, num_classes=None, is_training=True, global_pool=True, output_stride=None, spatial_squeeze=True, reuse=None, scope='resnet_v1_101'): """ResNet-101 model of [1]. See resnet_v1() for arg and return description.""" blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=1), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=23, stride=2) ] return resnet_v1.resnet_v1(inputs, blocks, num_classes, is_training, global_pool=global_pool, output_stride=output_stride, include_root_block=True, reuse=reuse, scope=scope)
def __init__(self): self.visualize = {} self.intermediate = {} self.predictions = {} self.score_summaries = {} self.event_summaries = {} self.train_summaries = [] self.losses = {} self.image = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image') self.spatial = tf.placeholder(tf.float32, shape=[None, 64, 64, 2], name = 'sp') self.Hsp_boxes = tf.placeholder(tf.float32, shape=[None, 5], name = 'Hsp_boxes') self.O_boxes = tf.placeholder(tf.float32, shape=[None, 5], name = 'O_boxes') self.gt_class_H = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_H') self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_HO') self.gt_class_sp = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_sp') self.Mask_HO = tf.placeholder(tf.float32, shape=[None, 29], name = 'HO_mask') self.Mask_H = tf.placeholder(tf.float32, shape=[None, 29], name = 'H_mask') self.Mask_sp = tf.placeholder(tf.float32, shape=[None, 29], name = 'sp_mask') self.H_num = tf.placeholder(tf.int32) self.num_classes = 29 self.num_fc = 1024 self.scope = 'resnet_v1_50' self.stride = [16, ] self.lr = tf.placeholder(tf.float32) if tf.__version__ == '1.1.0': self.blocks = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck,[(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3), resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)] else: from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block self.blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)]
def restnet_head(input, is_training, scope_name): block4 = [resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C5, _ = resnet_v1.resnet_v1(input, block4, global_pool=False, include_root_block=False, scope=scope_name) # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape') # C5_flatten = tf.reduce_mean(C5, axis=[1, 2], keep_dims=False, name='global_average_pooling') # C5_flatten = tf.Print(C5_flatten, [tf.shape(C5_flatten)], summarize=10, message='C5_flatten_shape') # global average pooling C5 to obtain fc layers return C5
def _decide_blocks(self): # choose different blocks for different number of layers if self._num_layers == 50: self._blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] elif self._num_layers == 101: self._blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=23, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] else: # other numbers are not supported raise NotImplementedError
def restnet_head(self, inputs, scope_name, is_training): block4 = [ resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] with slim.arg_scope(self.resnet_arg_scope(is_training=is_training)): net, _ = resnet_v1.resnet_v1(inputs, block4, global_pool=False, include_root_block=False, scope=scope_name) net_flatten = tf.reduce_mean(net, axis=[1, 2], keep_dims=False, name='global_average_pooling') # global average pooling C5 to obtain fc layers return net_flatten
def resnet_base(img_batch, scope_name, is_training=True): if scope_name == 'resnet_v1_50': middle_num_units = 6 elif scope_name == 'resnet_v1_101': middle_num_units = 23 else: raise NotImplementedError( 'We only support resnet_v1_50 or resnet_v1_101. ') blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=2), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] # when use fpn . stride list is [1, 2, 2] with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope(scope_name, scope_name): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net = resnet_utils.conv2d_same(img_batch, 64, 7, stride=2, scope='conv1') net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) net = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1') not_freezed = [False ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True] # Fixed_Blocks can be 1~3 with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[0]))): C2, end_points_C2 = resnet_v1.resnet_v1(net, blocks[0:1], global_pool=False, include_root_block=False, scope=scope_name) # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape') # add_heatmap(C2, name='Layer2/C2_heat') with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[1]))): C3, end_points_C3 = resnet_v1.resnet_v1(C2, blocks[1:2], global_pool=False, include_root_block=False, scope=scope_name) # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape') # add_heatmap(C3, name='Layer3/C3_heat') with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[2]))): C4, end_points_C4 = resnet_v1.resnet_v1(C3, blocks[2:3], global_pool=False, include_root_block=False, scope=scope_name) # add_heatmap(C4, name='Layer4/C4_heat') # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C5, end_points_C5 = resnet_v1.resnet_v1(C4, blocks[3:4], global_pool=False, include_root_block=False, scope=scope_name) # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape') # add_heatmap(C5, name='Layer5/C5_heat') feature_dict = { 'C2': end_points_C2['{}/block1/unit_2/bottleneck_v1'.format(scope_name)], 'C3': end_points_C3['{}/block2/unit_3/bottleneck_v1'.format(scope_name)], 'C4': end_points_C4['{}/block3/unit_{}/bottleneck_v1'.format( scope_name, middle_num_units - 1)], 'C5': end_points_C5['{}/block4/unit_3/bottleneck_v1'.format(scope_name)], # 'C5': end_points_C5['{}/block4'.format(scope_name)], } pyramid_dict = {} with tf.variable_scope('build_pyramid'): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY), activation_fn=None, normalizer_fn=None): P5 = slim.conv2d(feature_dict['C5'], num_outputs=256, kernel_size=[1, 1], stride=1, scope='build_P5') pyramid_dict['P5'] = P5 for level in range(4, 2, -1): # build [P4, P3] pyramid_dict['P%d' % level] = fusion_two_layer( C_i=feature_dict["C%d" % level], P_j=pyramid_dict["P%d" % (level + 1)], scope='build_P%d' % level) for level in range(5, 2, -1): pyramid_dict['P%d' % level] = slim.conv2d( pyramid_dict['P%d' % level], num_outputs=256, kernel_size=[3, 3], padding="SAME", stride=1, scope="fuse_P%d" % level) p6 = slim.conv2d( pyramid_dict['P5'] if cfgs.USE_P5 else feature_dict['C5'], num_outputs=256, kernel_size=[3, 3], padding="SAME", stride=2, scope='p6_conv') pyramid_dict['P6'] = p6 p7 = tf.nn.relu(p6, name='p6_relu') p7 = slim.conv2d(p7, num_outputs=256, kernel_size=[3, 3], padding="SAME", stride=2, scope='p7_conv') pyramid_dict['P7'] = p7 # for level in range(7, 1, -1): # add_heatmap(pyramid_dict['P%d' % level], name='Layer%d/P%d_heat' % (level, level)) return pyramid_dict
def _decide_blocks(self): # 关于不同数量层 选择不同的块 if self._num_layers == 50: self._blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] elif self._num_layers == 101: self._blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=23, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] elif self._num_layers == 152: self._blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=8, stride=2), resnet_v1_block('block3', base_depth=256, num_units=36, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] else: raise NotImplementedError
def _decide_blocks(self): # choose different blocks for different number of layers if self._num_layers == 50: self._blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] elif self._num_layers == 101: self._blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=23, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] elif self._num_layers == 152: self._blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=8, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=36, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] else: # other numbers are not supported raise NotImplementedError
def build_network(self, sess, is_training=True): # select initializers # if cfg.TRAIN.TRUNCATED: if cfg.FLAGS.initializer == "truncated": initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) bottleneck = resnet_v1.bottleneck # choose different blocks for different number of layers if self._num_layers == 50: # blocks = [ # resnet_utils.Block('block1', bottleneck, # [(256, 64, 1)] * 2 + [(256, 64, 2)]), # resnet_utils.Block('block2', bottleneck, # [(512, 128, 1)] * 3 + [(512, 128, 2)]), # # Use stride-1 for the last conv4 layer # resnet_utils.Block('block3', bottleneck, # [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), # resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) # ] blocks = [ resnet_v1.resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1.resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1.resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1.resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), ] elif self._num_layers == 101: # blocks = [ # resnet_utils.Block('block1', bottleneck, # [(256, 64, 1)] * 2 + [(256, 64, 2)]), # resnet_utils.Block('block2', bottleneck, # [(512, 128, 1)] * 3 + [(512, 128, 2)]), # # Use stride-1 for the last conv4 layer # resnet_utils.Block('block3', bottleneck, # [(1024, 256, 1)] * 22 + [(1024, 256, 1)]), # resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) # ] blocks = [ resnet_v1.resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1.resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1.resnet_v1_block('block3', base_depth=256, num_units=23, stride=1), resnet_v1.resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), ] elif self._num_layers == 152: # blocks = [ # resnet_utils.Block('block1', bottleneck, # [(256, 64, 1)] * 2 + [(256, 64, 2)]), # resnet_utils.Block('block2', bottleneck, # [(512, 128, 1)] * 7 + [(512, 128, 2)]), # # Use stride-1 for the last conv4 layer # resnet_utils.Block('block3', bottleneck, # [(1024, 256, 1)] * 35 + [(1024, 256, 1)]), # resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) # ] blocks = [ resnet_v1.resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1.resnet_v1_block('block2', base_depth=128, num_units=8, stride=2), resnet_v1.resnet_v1_block('block3', base_depth=256, num_units=36, stride=1), resnet_v1.resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), ] else: # other numbers are not supported raise NotImplementedError assert (0 <= cfg.FLAGS.fixed_blocks < 4) if cfg.FLAGS.fixed_blocks == 3: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1( net, blocks[0:cfg.FLAGS.fixed_blocks], global_pool=False, include_root_block=False, scope=self._resnet_scope) elif cfg.FLAGS.fixed_blocks > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.FLAGS.fixed_blocks], global_pool=False, include_root_block=False, scope=self._resnet_scope) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, _ = resnet_v1.resnet_v1( net, blocks[cfg.FLAGS.fixed_blocks:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) else: # cfg.RESNET.FIXED_BLOCKS == 0 with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._act_summaries.append(net_conv4) self._layers['head'] = net_conv4 with tf.variable_scope(self._resnet_scope, self._resnet_scope): # build the anchors for the image self._anchor_component() # rpn rpn = slim.conv2d(net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer( rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer( rpn_cls_prob, rpn_bbox_pred, "rois") rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer( rois, roi_scores, "rpn_rois") else: if cfg.FLAGS.test_mode == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.FLAGS.test_mode == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError # rcnn if cfg.FLAGS.pooling_mode == 'crop': pool5 = self._crop_pool_layer(net_conv4, rois, "pool5") else: raise NotImplementedError with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v1.resnet_v1(pool5, blocks[-1:], global_pool=False, include_root_block=False, scope=self._resnet_scope) with tf.variable_scope(self._resnet_scope, self._resnet_scope): # Average pooling done by reduce_mean fc7 = tf.reduce_mean(fc7, axis=[1, 2]) cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score') cls_prob = self._softmax_layer(cls_score, "cls_prob") bbox_pred = slim.fully_connected( fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def resnet_base(img_batch, scope_name, is_training=True): ''' this code is derived from light-head rcnn. https://github.com/zengarden/light_head_rcnn It is convenient to freeze blocks. So we adapt this mode. ''' if scope_name == 'resnet_v1_50': middle_num_units = 6 elif scope_name == 'resnet_v1_101': middle_num_units = 23 else: raise NotImplementedError('We only support resnet_v1_50 or resnet_v1_101. Check your network name....yjr') blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer. resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=1)] # when use fpn . stride list is [1, 2, 2] with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope(scope_name, scope_name): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net = resnet_utils.conv2d_same( img_batch, 64, 7, stride=2, scope='conv1') net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) net = slim.max_pool2d( net, [3, 3], stride=2, padding='VALID', scope='pool1') not_freezed = [False] * cfgs.FIXED_BLOCKS + (4-cfgs.FIXED_BLOCKS)*[True] # Fixed_Blocks can be 1~3 with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[0]))): C2, _ = resnet_v1.resnet_v1(net, blocks[0:1], global_pool=False, include_root_block=False, scope=scope_name) # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape') with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[1]))): C3, _ = resnet_v1.resnet_v1(C2, blocks[1:2], global_pool=False, include_root_block=False, scope=scope_name) # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape') with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))): C4, _ = resnet_v1.resnet_v1(C3, blocks[2:3], global_pool=False, include_root_block=False, scope=scope_name) # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape') return C2, C4
def _build_network(self, sess, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) # choose different blocks for different number of layers if self._num_layers == 50: blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] elif self._num_layers == 101: blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=23, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] elif self._num_layers == 152: blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=8, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=36, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] else: # other numbers are not supported raise NotImplementedError assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3) # Now the base is always fixed during training with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv = self._build_base() if cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv, _ = resnet_v1.resnet_v1(net_conv, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) if cfg.RESNET.FIXED_BLOCKS < 3: with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv, _ = resnet_v1.resnet_v1(net_conv, blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._act_summaries.append(net_conv) self._layers['head'] = net_conv with tf.variable_scope(self._resnet_scope, self._resnet_scope): # build the anchors for the image self._anchor_component() # region proposal network rois = self._region_proposal(net_conv, is_training, initializer) # region of interest pooling if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv, rois, "pool5") else: raise NotImplementedError with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v1.resnet_v1(pool5, blocks[-1:], global_pool=False, include_root_block=False, scope=self._resnet_scope) with tf.variable_scope(self._resnet_scope, self._resnet_scope): # average pooling done by reduce_mean fc7 = tf.reduce_mean(fc7, axis=[1, 2]) # region classification cls_prob, bbox_pred = self._region_classification(fc7, is_training, initializer, initializer_bbox) self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def resnet_base(img_batch, scope_name, is_training): ''' this code is derived from light-head rcnn. https://github.com/zengarden/light_head_rcnn It is convenient to freeze blocks. So we adapt this mode. ''' if scope_name == 'resnet_v1_50': middle_num_units = 6 elif scope_name == 'resnet_v1_101': middle_num_units = 23 #101第3个block是23 elif scope_name == 'resnet_v1_152': middle_num_units = 36 else: raise NotImplementedError( 'We only support resnet_v1_50 、resnet_v1_101 、resnet152. Check your network name....yjr' ) blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=1), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=2), resnet_v1_block('block4', base_depth=512, num_units=3, stride=2) ] with slim.arg_scope( resnet_arg_scope(is_training=is_training)): #resnet_arg_scope配置参数 with tf.variable_scope(scope_name, scope_name): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net = resnet_utils.conv2d_same( img_batch, 64, 7, stride=2, scope='conv1') #RESNET第一个卷积层, 7*7*64, stride=2 net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0] ]) #padding 0 ?? 类似与后面的samepadding? net = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1') #3*3最大池化 #not_freezed = [False] * cfgs.FIXED_BLOCKS + (4-cfgs.FIXED_BLOCKS)*[True] #不冻结的Blocks层 #net = tf.Print(net, [tf.shape(net)], summarize=10, message='net') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C2, end_points_C2 = resnet_v1.resnet_v1( net, blocks[0:1], #传入的是一个resnet_utils.Block类 一整个Resnet block global_pool=False, include_root_block=False, scope=scope_name ) #返回当前构建resnet block层:C2 end_points_C2: collection中已有的特征图 越到后面越多 with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C3, end_points_C3 = resnet_v1.resnet_v1( C2, blocks[1:2], global_pool=False, include_root_block=False, scope=scope_name) #构建第二个block模块 with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C4, end_points_C4 = resnet_v1.resnet_v1(C3, blocks[2:3], global_pool=False, include_root_block=False, scope=scope_name) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C5, end_points_C5 = resnet_v1.resnet_v1(C4, blocks[3:4], num_classes=cfgs.num_classes, global_pool=True, include_root_block=False, scope=scope_name) C5 = tf.reshape(C5, [-1, cfgs.num_classes]) return C5
def __init__(self, in_channels=3, num_classes=0, num_nodes=17 + 2, edge_importance_weighting=True, is_training=True, num_fc=1024, posetype=1, bi_posegraph=False, bodypart=False, binary=False, posemap=False, posegraph=False, semantic=False, data_bn=True): self.predictions = {} self.train_summaries = [] self.losses = {} self.lr = tf.placeholder(tf.float32) self.num_binary = 1 # existence of HOI (0 or 1) self.num_classes = 600 self.gt_binary_label = tf.placeholder(tf.float32, shape=[None, 1], name='gt_binary_label') self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 600], name='gt_class_HO') self.is_training = is_training if self.is_training: self.keep_prob = cfg.TRAIN_DROP_OUT_BINARY self.keep_prob_tail = .5 else: self.keep_prob = 1 self.keep_prob_tail = 1 self.image = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='image') self.head = tf.placeholder(tf.float32, shape=[1, None, None, 1024], name='head') self.H_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='H_boxes') self.O_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='O_boxes') self.partboxes = tf.placeholder(tf.float32, shape=[None, 17, 5], name='part_boxes') self.semantic = tf.placeholder(tf.float32, shape=[None, 1024], name='semantic_feat') self.H_num = tf.placeholder(tf.int32) # Control the network architecture self.bodypart = bodypart self.binary = binary self.posemap = posemap self.posegraph = posegraph self.bi_posegraph = bi_posegraph self.posetype = posetype self.semantic_flag = semantic if self.posetype == 1: self.spatial = tf.placeholder(tf.float32, shape=[None, 64, 64, 2], name='sp') else: self.spatial = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name='sp') # ResNet 50 Network self.scope = 'resnet_v1_50' self.num_fc = 1024 self.num_fc2 = num_fc self.stride = [ 16, ] if tf.__version__ == '1.1.0': self.blocks = [ resnet_utils.Block('block1', resnet_v1.bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', resnet_v1.bottleneck, [(2048, 512, 1)] * 3), resnet_utils.Block('block5', resnet_v1.bottleneck, [(2048, 512, 1)] * 3) ] else: from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block self.blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), resnet_v1_block('block5', base_depth=512, num_units=3, stride=1) ] # GCN setting self.num_nodes = num_nodes self.c = in_channels self.data_bn = data_bn self.strategy = 'spatial' self.graph = Graph(strategy=self.strategy) self.A = tf.convert_to_tensor(self.graph.A.astype( np.float32)) # [None, num_nodes, num_nodes] self.spatial_kernel_size = self.A.shape[0] # [N, C, T, V, M] = [N, 3, 1, 19, 1] self.Gnodes = tf.placeholder( tf.float32, shape=[None, self.c, 1, self.num_nodes, 1], name='Gnodes') # ST_GCN self.depth_st_gcn_networks = 10
def __init__(self, model_name): self.model_name = model_name self.visualize = {} self.test_visualize = {} self.intermediate = {} self.predictions = {} self.score_summaries = {} self.event_summaries = {} self.train_summaries = [] self.losses = {} self.image = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='image') self.spatial = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name='sp') self.H_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='H_boxes') self.O_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='O_boxes') self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 600], name='gt_class_HO') self.H_num = tf.placeholder(tf.int32) # positive nums self.image_id = tf.placeholder(tf.int32) self.num_classes = 600 self.compose_num_classes = 600 self.num_fc = 1024 self.verb_num_classes = 117 self.obj_num_classes = 80 self.scope = 'resnet_v1_101' self.stride = [ 16, ] self.lr = tf.placeholder(tf.float32) if tf.__version__ == '1.1.0': raise Exception('wrong tensorflow version 1.1.0') else: from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block self.blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=23, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), resnet_v1_block('block5', base_depth=512, num_units=3, stride=1) ] if self.model_name.__contains__('unique_weights') or self.model_name.__contains__('_pa3')\ or self.model_name.__contains__('_pa4'): print("add block6 unique_weights2") self.blocks.append( resnet_v1_block('block6', base_depth=512, num_units=3, stride=1)) """We copy from TIN. calculated by log(1/(n_c/sum(n_c)) c is the category and n_c is the number of positive samples""" self.HO_weight = np.array([ 9.192927, 9.778443, 10.338059, 9.164914, 9.075144, 10.045923, 8.714437, 8.59822, 12.977117, 6.2745423, 11.227917, 6.765012, 9.436157, 9.56762, 11.0675745, 11.530198, 9.609821, 9.897503, 6.664475, 6.811699, 6.644726, 9.170454, 13.670264, 3.903943, 10.556748, 8.814335, 9.519224, 12.753973, 11.590822, 8.278912, 5.5245695, 9.7286825, 8.997436, 10.699849, 9.601237, 11.965516, 9.192927, 10.220277, 6.056692, 7.734048, 8.42324, 6.586457, 6.969533, 10.579222, 13.670264, 4.4531965, 9.326459, 9.288238, 8.071842, 10.431585, 12.417501, 11.530198, 11.227917, 4.0678477, 8.854023, 12.571651, 8.225684, 10.996116, 11.0675745, 10.100731, 7.0376034, 7.463688, 12.571651, 14.363411, 5.4902234, 11.0675745, 14.363411, 8.45805, 10.269067, 9.820116, 14.363411, 11.272368, 11.105314, 7.981595, 9.198626, 3.3284247, 14.363411, 12.977117, 9.300817, 10.032678, 12.571651, 10.114916, 10.471591, 13.264799, 14.363411, 8.01953, 10.412168, 9.644913, 9.981384, 7.2197933, 14.363411, 3.1178555, 11.031207, 8.934066, 7.546675, 6.386472, 12.060826, 8.862153, 9.799063, 12.753973, 12.753973, 10.412168, 10.8976755, 10.471591, 12.571651, 9.519224, 6.207762, 12.753973, 6.60636, 6.2896967, 4.5198326, 9.7887, 13.670264, 11.878505, 11.965516, 8.576513, 11.105314, 9.192927, 11.47304, 11.367679, 9.275815, 11.367679, 9.944571, 11.590822, 10.451388, 9.511381, 11.144535, 13.264799, 5.888291, 11.227917, 10.779892, 7.643191, 11.105314, 9.414651, 11.965516, 14.363411, 12.28397, 9.909063, 8.94731, 7.0330057, 8.129001, 7.2817025, 9.874775, 9.758241, 11.105314, 5.0690055, 7.4768796, 10.129305, 9.54313, 13.264799, 9.699972, 11.878505, 8.260853, 7.1437693, 6.9321113, 6.990665, 8.8104515, 11.655361, 13.264799, 4.515912, 9.897503, 11.418972, 8.113436, 8.795067, 10.236277, 12.753973, 14.363411, 9.352776, 12.417501, 0.6271591, 12.060826, 12.060826, 12.166186, 5.2946343, 11.318889, 9.8308115, 8.016022, 9.198626, 10.8976755, 13.670264, 11.105314, 14.363411, 9.653881, 9.503599, 12.753973, 5.80546, 9.653881, 9.592727, 12.977117, 13.670264, 7.995224, 8.639826, 12.28397, 6.586876, 10.929424, 13.264799, 8.94731, 6.1026597, 12.417501, 11.47304, 10.451388, 8.95624, 10.996116, 11.144535, 11.031207, 13.670264, 13.670264, 6.397866, 7.513285, 9.981384, 11.367679, 11.590822, 7.4348736, 4.415428, 12.166186, 8.573451, 12.977117, 9.609821, 8.601359, 9.055143, 11.965516, 11.105314, 13.264799, 5.8201604, 10.451388, 9.944571, 7.7855496, 14.363411, 8.5463, 13.670264, 7.9288645, 5.7561946, 9.075144, 9.0701065, 5.6871653, 11.318889, 10.252538, 9.758241, 9.407584, 13.670264, 8.570397, 9.326459, 7.488179, 11.798462, 9.897503, 6.7530537, 4.7828183, 9.519224, 7.6492405, 8.031909, 7.8180614, 4.451856, 10.045923, 10.83705, 13.264799, 13.670264, 4.5245686, 14.363411, 10.556748, 10.556748, 14.363411, 13.670264, 14.363411, 8.037262, 8.59197, 9.738439, 8.652985, 10.045923, 9.400566, 10.9622135, 11.965516, 10.032678, 5.9017305, 9.738439, 12.977117, 11.105314, 10.725825, 9.080208, 11.272368, 14.363411, 14.363411, 13.264799, 6.9279733, 9.153925, 8.075553, 9.126969, 14.363411, 8.903826, 9.488214, 5.4571533, 10.129305, 10.579222, 12.571651, 11.965516, 6.237189, 9.428937, 9.618479, 8.620408, 11.590822, 11.655361, 9.968962, 10.8080635, 10.431585, 14.363411, 3.796231, 12.060826, 10.302968, 9.551227, 8.75394, 10.579222, 9.944571, 14.363411, 6.272396, 10.625742, 9.690582, 13.670264, 11.798462, 13.670264, 11.724354, 9.993963, 8.230013, 9.100721, 10.374427, 7.865129, 6.514087, 14.363411, 11.031207, 11.655361, 12.166186, 7.419324, 9.421769, 9.653881, 10.996116, 12.571651, 13.670264, 5.912144, 9.7887, 8.585759, 8.272101, 11.530198, 8.886948, 5.9870906, 9.269661, 11.878505, 11.227917, 13.670264, 8.339964, 7.6763024, 10.471591, 10.451388, 13.670264, 11.185357, 10.032678, 9.313555, 12.571651, 3.993144, 9.379805, 9.609821, 14.363411, 9.709451, 8.965248, 10.451388, 7.0609145, 10.579222, 13.264799, 10.49221, 8.978916, 7.124196, 10.602211, 8.9743395, 7.77862, 8.073695, 9.644913, 9.339531, 8.272101, 4.794418, 9.016304, 8.012526, 10.674532, 14.363411, 7.995224, 12.753973, 5.5157638, 8.934066, 10.779892, 7.930471, 11.724354, 8.85808, 5.9025764, 14.363411, 12.753973, 12.417501, 8.59197, 10.513264, 10.338059, 14.363411, 7.7079706, 14.363411, 13.264799, 13.264799, 10.752493, 14.363411, 14.363411, 13.264799, 12.417501, 13.670264, 6.5661197, 12.977117, 11.798462, 9.968962, 12.753973, 11.47304, 11.227917, 7.6763024, 10.779892, 11.185357, 14.363411, 7.369478, 14.363411, 9.944571, 10.779892, 10.471591, 9.54313, 9.148476, 10.285873, 10.412168, 12.753973, 14.363411, 6.0308623, 13.670264, 10.725825, 12.977117, 11.272368, 7.663911, 9.137665, 10.236277, 13.264799, 6.715625, 10.9622135, 14.363411, 13.264799, 9.575919, 9.080208, 11.878505, 7.1863923, 9.366199, 8.854023, 9.874775, 8.2857685, 13.670264, 11.878505, 12.166186, 7.616999, 9.44343, 8.288065, 8.8104515, 8.347254, 7.4738197, 10.302968, 6.936267, 11.272368, 7.058223, 5.0138307, 12.753973, 10.173757, 9.863602, 11.318889, 9.54313, 10.996116, 12.753973, 7.8339925, 7.569945, 7.4427395, 5.560738, 12.753973, 10.725825, 10.252538, 9.307165, 8.491293, 7.9161053, 7.8849015, 7.782772, 6.3088884, 8.866243, 9.8308115, 14.363411, 10.8976755, 5.908519, 10.269067, 9.176025, 9.852551, 9.488214, 8.90809, 8.537411, 9.653881, 8.662968, 11.965516, 10.143904, 14.363411, 14.363411, 9.407584, 5.281472, 11.272368, 12.060826, 14.363411, 7.4135547, 8.920994, 9.618479, 8.891141, 14.363411, 12.060826, 11.965516, 10.9622135, 10.9622135, 14.363411, 5.658909, 8.934066, 12.571651, 8.614018, 11.655361, 13.264799, 10.996116, 13.670264, 8.965248, 9.326459, 11.144535, 14.363411, 6.0517673, 10.513264, 8.7430105, 10.338059, 13.264799, 6.878481, 9.065094, 8.87035, 14.363411, 9.92076, 6.5872955, 10.32036, 14.363411, 9.944571, 11.798462, 10.9622135, 11.031207, 7.652888, 4.334878, 13.670264, 13.670264, 14.363411, 10.725825, 12.417501, 14.363411, 13.264799, 11.655361, 10.338059, 13.264799, 12.753973, 8.206432, 8.916674, 8.59509, 14.363411, 7.376845, 11.798462, 11.530198, 11.318889, 11.185357, 5.0664344, 11.185357, 9.372978, 10.471591, 9.6629305, 11.367679, 8.73579, 9.080208, 11.724354, 5.04781, 7.3777695, 7.065643, 12.571651, 11.724354, 12.166186, 12.166186, 7.215852, 4.374113, 11.655361, 11.530198, 14.363411, 6.4993753, 11.031207, 8.344818, 10.513264, 10.032678, 14.363411, 14.363411, 4.5873594, 12.28397, 13.670264, 12.977117, 10.032678, 9.609821 ], dtype='float32').reshape(1, 600) num_inst_path = cfg.ROOT_DIR + '/Data/num_inst.npy' num_inst = np.load(num_inst_path) self.num_inst = num_inst verb_to_HO_matrix, obj_to_HO_matrix = get_convert_matrix( self.verb_num_classes, self.obj_num_classes) self.obj_to_HO_matrix = tf.constant(obj_to_HO_matrix, tf.float32) self.verb_to_HO_matrix = tf.constant(verb_to_HO_matrix, tf.float32) self.gt_obj_class = tf.cast( tf.matmul( self.gt_class_HO, self.obj_to_HO_matrix, transpose_b=True) > 0, tf.float32) self.gt_verb_class = tf.cast( tf.matmul(self.gt_class_HO, self.verb_to_HO_matrix, transpose_b=True) > 0, tf.float32)
def resnet_base(img_batch, scope_name, is_training=False): ''' this code is derived from light-head rcnn. https://github.com/zengarden/light_head_rcnn It is convenient to freeze blocks. So we adapt this mode. ''' if scope_name == 'resnet_v1_50': middle_num_units = 6 elif scope_name == 'resnet_v1_101': middle_num_units = 23 else: raise NotImplementedError( 'We only support resnet_v1_50 or resnet_v1_101. Check your network name....yjr' ) blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=9, stride=2), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] # when use fpn . stride list is [1, 2, 2] with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope(scope_name, scope_name): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net = resnet_utils.conv2d_same(img_batch, 64, 7, stride=2, scope='conv1') net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) net = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1') not_freezed = [False] * 0 + (4 - 0) * [True] # Fixed_Blocks can be 1~3 with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[0]))): C2, end_points_C2 = resnet_v1.resnet_v1(net, blocks[0:1], global_pool=False, include_root_block=False, scope=scope_name) #C2=tf.layers.average_pooling2d(inputs=C2, pool_size=3, strides=2,padding="valid") #C2=tf.reduce_mean(C2, axis=[1, 2], keep_dims=False, name='global_average_pooling') with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[1]))): C3, end_points_C3 = resnet_v1.resnet_v1(C2, blocks[1:2], global_pool=False, include_root_block=False, scope=scope_name) C3 = slim.avg_pool2d(C3, 2) #C3 = tf.reduce_mean(C3, axis=[1, 2], keep_dims=False, name='global_average_pooling') #return C3 '''with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))): C4, end_points_C4 = resnet_v1.resnet_v1(C3, blocks[2:3], global_pool=False, include_root_block=False, scope=scope_name)''' return C3
def resnet_base(img_batch, scope_name, is_training=True): ''' this code is derived from light-head rcnn. https://github.com/zengarden/light_head_rcnn It is convenient to freeze blocks. So we adapt this mode. ''' if scope_name == 'resnet_v1_50': middle_num_units = 6 elif scope_name == 'resnet_v1_101': middle_num_units = 23 else: raise NotImplementedError( 'We only support resnet_v1_50 or resnet_v1_101. Check your network name....' ) blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=2), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] # when use fpn . stride list is [1, 2, 2] with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope(scope_name, scope_name): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net = resnet_utils.conv2d_same(img_batch, 64, 7, stride=2, scope='conv1') net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) net = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1') not_freezed = [False ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True] # Fixed_Blocks can be 1~3 with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[0]))): C2, end_points_C2 = resnet_v1.resnet_v1(net, blocks[0:1], global_pool=False, include_root_block=False, scope=scope_name) # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape') # add_heatmap(C2, name='Layer2/C2_heat') with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[1]))): C3, end_points_C3 = resnet_v1.resnet_v1(C2, blocks[1:2], global_pool=False, include_root_block=False, scope=scope_name) # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape') # add_heatmap(C3, name='Layer3/C3_heat') with slim.arg_scope( resnet_arg_scope(is_training=(is_training and not_freezed[2]))): C4, end_points_C4 = resnet_v1.resnet_v1(C3, blocks[2:3], global_pool=False, include_root_block=False, scope=scope_name) # add_heatmap(C4, name='Layer4/C4_heat') # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C5, end_points_C5 = resnet_v1.resnet_v1(C4, blocks[3:4], global_pool=False, include_root_block=False, scope=scope_name) # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape') # add_heatmap(C5, name='Layer5/C5_heat') feature_dict = { 'C2': end_points_C2['{}/block1/unit_2/bottleneck_v1'.format(scope_name)], 'C3': end_points_C3['{}/block2/unit_3/bottleneck_v1'.format(scope_name)], 'C4': end_points_C4['{}/block3/unit_{}/bottleneck_v1'.format( scope_name, middle_num_units - 1)], 'C5': end_points_C5['{}/block4/unit_3/bottleneck_v1'.format(scope_name)], # 'C5': end_points_C5['{}/block4'.format(scope_name)], } pyramid_dict = {} with tf.variable_scope('build_pyramid'): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY), activation_fn=None, normalizer_fn=None): P5 = slim.conv2d(feature_dict['C5'], num_outputs=256, kernel_size=[1, 1], stride=1, scope='build_P5') pyramid_dict['P5'] = P5 for level in range(4, 1, -1): # build [P4, P3, P2] pyramid_dict['P%d' % level] = fusion_two_layer( C_i=feature_dict["C%d" % level], P_j=pyramid_dict["P%d" % (level + 1)], scope='build_P%d' % level) for level in range(5, 1, -1): pyramid_dict['P%d' % level] = slim.conv2d( pyramid_dict['P%d' % level], num_outputs=256, kernel_size=[3, 3], padding="SAME", stride=1, scope="fuse_P%d" % level) if "P6" in cfgs.LEVLES: P6 = slim.avg_pool2d(pyramid_dict['P5'], kernel_size=[1, 1], stride=2, scope='build_P6') pyramid_dict['P6'] = P6 # for level in range(5, 1, -1): # add_heatmap(feature_dict['C%d' % level], name='Layer%d/C%d_heat' % (level, level)) # add_heatmap(pyramid_dict['P%d' % level], name='Layer%d/P%d_heat' % (level, level)) # return [P2, P3, P4, P5, P6] print("we are in Pyramid::-======>>>>") print(cfgs.LEVLES) print("base_anchor_size are: ", cfgs.BASE_ANCHOR_SIZE_LIST) print(20 * "__") return [pyramid_dict[level_name] for level_name in cfgs.LEVLES]
def __init__(self): self.visualize = {} self.intermediate = {} self.predictions = {} self.score_summaries = {} self.event_summaries = {} self.train_summaries = [] self.losses = {} self.image = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image') self.spatial = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name = 'sp') self.H_boxes = tf.placeholder(tf.float32, shape=[None, 5], name = 'H_boxes') self.O_boxes = tf.placeholder(tf.float32, shape=[None, 5], name = 'O_boxes') self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 600], name = 'gt_class_HO') self.gt_binary_label = tf.placeholder(tf.float32, shape=[None, 2], name = 'gt_binary_label') self.H_num = tf.placeholder(tf.int32) self.HO_weight = np.array([ 9.192927, 9.778443, 10.338059, 9.164914, 9.075144, 10.045923, 8.714437, 8.59822, 12.977117, 6.2745423, 11.227917, 6.765012, 9.436157, 9.56762, 11.0675745, 11.530198, 9.609821, 9.897503, 6.664475, 6.811699, 6.644726, 9.170454, 13.670264, 3.903943, 10.556748, 8.814335, 9.519224, 12.753973, 11.590822, 8.278912, 5.5245695, 9.7286825, 8.997436, 10.699849, 9.601237, 11.965516, 9.192927, 10.220277, 6.056692, 7.734048, 8.42324, 6.586457, 6.969533, 10.579222, 13.670264, 4.4531965, 9.326459, 9.288238, 8.071842, 10.431585, 12.417501, 11.530198, 11.227917, 4.0678477, 8.854023, 12.571651, 8.225684, 10.996116, 11.0675745, 10.100731, 7.0376034, 7.463688, 12.571651, 14.363411, 5.4902234, 11.0675745, 14.363411, 8.45805, 10.269067, 9.820116, 14.363411, 11.272368, 11.105314, 7.981595, 9.198626, 3.3284247, 14.363411, 12.977117, 9.300817, 10.032678, 12.571651, 10.114916, 10.471591, 13.264799, 14.363411, 8.01953, 10.412168, 9.644913, 9.981384, 7.2197933, 14.363411, 3.1178555, 11.031207, 8.934066, 7.546675, 6.386472, 12.060826, 8.862153, 9.799063, 12.753973, 12.753973, 10.412168, 10.8976755, 10.471591, 12.571651, 9.519224, 6.207762, 12.753973, 6.60636, 6.2896967, 4.5198326, 9.7887, 13.670264, 11.878505, 11.965516, 8.576513, 11.105314, 9.192927, 11.47304, 11.367679, 9.275815, 11.367679, 9.944571, 11.590822, 10.451388, 9.511381, 11.144535, 13.264799, 5.888291, 11.227917, 10.779892, 7.643191, 11.105314, 9.414651, 11.965516, 14.363411, 12.28397, 9.909063, 8.94731, 7.0330057, 8.129001, 7.2817025, 9.874775, 9.758241, 11.105314, 5.0690055, 7.4768796, 10.129305, 9.54313, 13.264799, 9.699972, 11.878505, 8.260853, 7.1437693, 6.9321113, 6.990665, 8.8104515, 11.655361, 13.264799, 4.515912, 9.897503, 11.418972, 8.113436, 8.795067, 10.236277, 12.753973, 14.363411, 9.352776, 12.417501, 0.6271591, 12.060826, 12.060826, 12.166186, 5.2946343, 11.318889, 9.8308115, 8.016022, 9.198626, 10.8976755, 13.670264, 11.105314, 14.363411, 9.653881, 9.503599, 12.753973, 5.80546, 9.653881, 9.592727, 12.977117, 13.670264, 7.995224, 8.639826, 12.28397, 6.586876, 10.929424, 13.264799, 8.94731, 6.1026597, 12.417501, 11.47304, 10.451388, 8.95624, 10.996116, 11.144535, 11.031207, 13.670264, 13.670264, 6.397866, 7.513285, 9.981384, 11.367679, 11.590822, 7.4348736, 4.415428, 12.166186, 8.573451, 12.977117, 9.609821, 8.601359, 9.055143, 11.965516, 11.105314, 13.264799, 5.8201604, 10.451388, 9.944571, 7.7855496, 14.363411, 8.5463, 13.670264, 7.9288645, 5.7561946, 9.075144, 9.0701065, 5.6871653, 11.318889, 10.252538, 9.758241, 9.407584, 13.670264, 8.570397, 9.326459, 7.488179, 11.798462, 9.897503, 6.7530537, 4.7828183, 9.519224, 7.6492405, 8.031909, 7.8180614, 4.451856, 10.045923, 10.83705, 13.264799, 13.670264, 4.5245686, 14.363411, 10.556748, 10.556748, 14.363411, 13.670264, 14.363411, 8.037262, 8.59197, 9.738439, 8.652985, 10.045923, 9.400566, 10.9622135, 11.965516, 10.032678, 5.9017305, 9.738439, 12.977117, 11.105314, 10.725825, 9.080208, 11.272368, 14.363411, 14.363411, 13.264799, 6.9279733, 9.153925, 8.075553, 9.126969, 14.363411, 8.903826, 9.488214, 5.4571533, 10.129305, 10.579222, 12.571651, 11.965516, 6.237189, 9.428937, 9.618479, 8.620408, 11.590822, 11.655361, 9.968962, 10.8080635, 10.431585, 14.363411, 3.796231, 12.060826, 10.302968, 9.551227, 8.75394, 10.579222, 9.944571, 14.363411, 6.272396, 10.625742, 9.690582, 13.670264, 11.798462, 13.670264, 11.724354, 9.993963, 8.230013, 9.100721, 10.374427, 7.865129, 6.514087, 14.363411, 11.031207, 11.655361, 12.166186, 7.419324, 9.421769, 9.653881, 10.996116, 12.571651, 13.670264, 5.912144, 9.7887, 8.585759, 8.272101, 11.530198, 8.886948, 5.9870906, 9.269661, 11.878505, 11.227917, 13.670264, 8.339964, 7.6763024, 10.471591, 10.451388, 13.670264, 11.185357, 10.032678, 9.313555, 12.571651, 3.993144, 9.379805, 9.609821, 14.363411, 9.709451, 8.965248, 10.451388, 7.0609145, 10.579222, 13.264799, 10.49221, 8.978916, 7.124196, 10.602211, 8.9743395, 7.77862, 8.073695, 9.644913, 9.339531, 8.272101, 4.794418, 9.016304, 8.012526, 10.674532, 14.363411, 7.995224, 12.753973, 5.5157638, 8.934066, 10.779892, 7.930471, 11.724354, 8.85808, 5.9025764, 14.363411, 12.753973, 12.417501, 8.59197, 10.513264, 10.338059, 14.363411, 7.7079706, 14.363411, 13.264799, 13.264799, 10.752493, 14.363411, 14.363411, 13.264799, 12.417501, 13.670264, 6.5661197, 12.977117, 11.798462, 9.968962, 12.753973, 11.47304, 11.227917, 7.6763024, 10.779892, 11.185357, 14.363411, 7.369478, 14.363411, 9.944571, 10.779892, 10.471591, 9.54313, 9.148476, 10.285873, 10.412168, 12.753973, 14.363411, 6.0308623, 13.670264, 10.725825, 12.977117, 11.272368, 7.663911, 9.137665, 10.236277, 13.264799, 6.715625, 10.9622135, 14.363411, 13.264799, 9.575919, 9.080208, 11.878505, 7.1863923, 9.366199, 8.854023, 9.874775, 8.2857685, 13.670264, 11.878505, 12.166186, 7.616999, 9.44343, 8.288065, 8.8104515, 8.347254, 7.4738197, 10.302968, 6.936267, 11.272368, 7.058223, 5.0138307, 12.753973, 10.173757, 9.863602, 11.318889, 9.54313, 10.996116, 12.753973, 7.8339925, 7.569945, 7.4427395, 5.560738, 12.753973, 10.725825, 10.252538, 9.307165, 8.491293, 7.9161053, 7.8849015, 7.782772, 6.3088884, 8.866243, 9.8308115, 14.363411, 10.8976755, 5.908519, 10.269067, 9.176025, 9.852551, 9.488214, 8.90809, 8.537411, 9.653881, 8.662968, 11.965516, 10.143904, 14.363411, 14.363411, 9.407584, 5.281472, 11.272368, 12.060826, 14.363411, 7.4135547, 8.920994, 9.618479, 8.891141, 14.363411, 12.060826, 11.965516, 10.9622135, 10.9622135, 14.363411, 5.658909, 8.934066, 12.571651, 8.614018, 11.655361, 13.264799, 10.996116, 13.670264, 8.965248, 9.326459, 11.144535, 14.363411, 6.0517673, 10.513264, 8.7430105, 10.338059, 13.264799, 6.878481, 9.065094, 8.87035, 14.363411, 9.92076, 6.5872955, 10.32036, 14.363411, 9.944571, 11.798462, 10.9622135, 11.031207, 7.652888, 4.334878, 13.670264, 13.670264, 14.363411, 10.725825, 12.417501, 14.363411, 13.264799, 11.655361, 10.338059, 13.264799, 12.753973, 8.206432, 8.916674, 8.59509, 14.363411, 7.376845, 11.798462, 11.530198, 11.318889, 11.185357, 5.0664344, 11.185357, 9.372978, 10.471591, 9.6629305, 11.367679, 8.73579, 9.080208, 11.724354, 5.04781, 7.3777695, 7.065643, 12.571651, 11.724354, 12.166186, 12.166186, 7.215852, 4.374113, 11.655361, 11.530198, 14.363411, 6.4993753, 11.031207, 8.344818, 10.513264, 10.032678, 14.363411, 14.363411, 4.5873594, 12.28397, 13.670264, 12.977117, 10.032678, 9.609821 ], dtype = 'float32').reshape(1,600) self.binary_weight = np.array([1.6094379124341003, 0.22314355131420976], dtype = 'float32').reshape(1,2) self.num_classes = 600 # HOI self.num_binary = 2 # existence (0 or 1) of HOI self.num_fc = 1024 self.scope = 'resnet_v1_50' self.stride = [16, ] self.lr = tf.placeholder(tf.float32) if tf.__version__ == '1.1.0': self.blocks = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck,[(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3), resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)] else: # we use tf 1.2.0 here, Resnet-50 from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block self.blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), # a resnet_v1 bottleneck block resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), # feature former resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)]
def __init__(self, model_name): self.model_name = model_name self.visualize = {} self.test_visualize = {} self.intermediate = {} self.predictions = {} self.score_summaries = {} self.event_summaries = {} self.train_summaries = [] self.losses = {} self.image = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image') self.spatial = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name = 'sp') # self.Hsp_boxes = tf.placeholder(tf.float32, shape=[None, 5], name = 'Hsp_boxes') self.H_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='H_boxes') self.O_boxes = tf.placeholder(tf.float32, shape=[None, 5], name = 'O_boxes') self.gt_class_H = tf.placeholder(tf.float32, shape=[None, 24], name = 'gt_class_H') self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 24], name = 'gt_class_HO') self.gt_class_sp = tf.placeholder(tf.float32, shape=[None, 24], name = 'gt_class_sp') self.Mask_HO = tf.placeholder(tf.float32, shape=[None, 24], name = 'HO_mask') self.Mask_H = tf.placeholder(tf.float32, shape=[None, 24], name = 'H_mask') self.Mask_sp = tf.placeholder(tf.float32, shape=[None, 24], name = 'sp_mask') self.gt_compose = tf.placeholder(tf.float32, shape=[None, 222], name='gt_compose') self.gt_obj = tf.placeholder(tf.float32, shape=[None, 80], name='gt_obj') self.H_num = tf.placeholder(tf.int32) self.image_id = tf.placeholder(tf.int32) self.num_classes = 24 if self.model_name.__contains__('_t4_'): self.num_classes = 222 if self.model_name.__contains__('_t5_'): self.verb_num_classes = 21 self.num_classes = 222 self.num_fc = 1024 self.verb_num_classes = 24 self.obj_num_classes = 80 self.scope = 'resnet_v1_50' self.stride = [16, ] # self.lr = tf.placeholder(tf.float32) if tf.__version__ == '1.1.0': self.blocks = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck,[(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3), resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)] else: from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block self.blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)] if self.model_name.__contains__('unique_weights'): print("unique_weights2") self.blocks.append(resnet_v1_block('block6', base_depth=512, num_units=3, stride=1)) # remove 3, 17 22, 23 27 self.HO_weight = np.array([3.3510249, 3.4552405, 4.0257854, 4.088436, 3.4370995, 3.85842, 4.637334, 3.5487218, 3.536237, 2.5578923, 3.342811, 3.8897269, 4.70686, 3.3952892, 3.9706533, 4.504736, 1.4873443, 3.700363, 4.1058283, 3.6298118, 5.0808263, 1.520838, 3.3888445, 3.9899964], dtype='float32').reshape(1, 24) self.H_weight = np.array([4.0984106, 4.102459, 4.0414762, 4.0414762, 3.9768186, 4.23686, 5.3542085, 3.723717, 3.4699364, 2.4587274, 3.7167964, 4.08836, 5.050695, 3.9077065, 4.534647, 3.4699364, 1.8585607, 3.9433942, 3.9433942, 4.3523254, 5.138182, 1.7807873, 4.080392, 4.5761204], dtype='float32').reshape(1, 24) self.reset_classes()
def __init__(self): self.visualize = {} self.intermediate = {} self.predictions = {} self.score_summaries = {} self.event_summaries = {} self.train_summaries = [] self.losses = {} self.image = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='image') #scene stream self.H_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='H_boxes') # Human stream self.P_boxes = tf.placeholder(tf.float32, shape=[None, 10, 5], name='P_boxes') # PaSta stream self.gt_verb = tf.placeholder(tf.float32, shape=[None, 80], name='gt_class_verb') # target verb self.H_num = tf.placeholder(tf.int32) self.verb_weight = np.array(verb80, dtype='float32').reshape(1, -1) self.num_classes = 80 # HOI self.num_pasta0 = 12 # pasta0 ankle self.num_pasta1 = 10 # pasta1 knee self.num_pasta2 = 5 # pasta2 hip self.num_pasta3 = 31 # pasta3 hand self.num_pasta4 = 5 # pasta4 shoulder self.num_pasta5 = 13 # pasta5 head self.num_fc = 1024 self.scope = 'resnet_v1_50' self.stride = [ 16, ] self.lr = tf.placeholder(tf.float32) if tf.__version__ == '1.1.0': self.blocks = [ resnet_utils.Block('block1', resnet_v1.bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', resnet_v1.bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), resnet_utils.Block('block3', resnet_v1.bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', resnet_v1.bottleneck, [(2048, 512, 1)] * 3), resnet_utils.Block('block5', resnet_v1.bottleneck, [(2048, 512, 1)] * 3) ] else: # we use tf 1.2.0 here, Resnet-50 from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block self.blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), # a resnet_v1 bottleneck block resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), # feature former resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), resnet_v1_block('block5', base_depth=512, num_units=3, stride=1) ]
def _build_network(self, sess, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) # choose different blocks for different number of layers if self._num_layers == 50: blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] elif self._num_layers == 101: blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=23, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] elif self._num_layers == 152: blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=8, stride=2), # use stride 1 for the last conv4 layer resnet_v1_block('block3', base_depth=256, num_units=36, stride=1), resnet_v1_block('block4', base_depth=512, num_units=3, stride=1) ] else: # other numbers are not supported raise NotImplementedError assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3) # Now the base is always fixed during training with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv = self._build_base() base_output = net_conv if cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv, end_points_initial = resnet_v1.resnet_v1( net_conv, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) if cfg.RESNET.FIXED_BLOCKS < 3: with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv, end_points_initial = resnet_v1.resnet_v1( net_conv, blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._act_summaries.append(net_conv) self._layers['head'] = net_conv with tf.variable_scope(self._resnet_scope, self._resnet_scope): # build the anchors for the image self._anchor_component() # region proposal network rois = self._region_proposal(net_conv, is_training, initializer) # region of interest pooling if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv, rois, "pool5") else: raise NotImplementedError with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, end_points_last = resnet_v1.resnet_v1( pool5, blocks[-1:], global_pool=False, include_root_block=False, scope=self._resnet_scope) print("Layer dimensions") print(base_output.get_shape()) print(end_points_initial['resnet_v1_152/block2'].get_shape()) print(end_points_initial['resnet_v1_152/block3'].get_shape()) concatenated_features_lower = tf.concat(values=[tf.reduce_mean(base_output, axis=[1, 2]), \ tf.reduce_mean(end_points_initial['resnet_v1_152/block2'], axis=[1, 2]), \ tf.reduce_mean(end_points_initial['resnet_v1_152/block3'], axis=[1, 2])], axis=1) print("Concatenated lower features dimensions") feature_size = concatenated_features_lower.get_shape( ).dims[1].value print(feature_size) # concatenated_features = tf.reshape(tf.tile(concatenated_features_lower, [1, 256]), [-1, tf.shape(concatenated_features_lower)[1]]) concatenated_features = tf.reshape( tf.tile(concatenated_features_lower, [1, 256]), [256, feature_size]) print(concatenated_features.get_shape()) concatenated_features = tf.concat(values=[ concatenated_features, tf.reduce_mean(end_points_last['resnet_v1_152/block4'], axis=[1, 2]) ], axis=1) print(concatenated_features.get_shape()) with tf.variable_scope(self._resnet_scope, self._resnet_scope): # average pooling done by reduce_mean fc7 = tf.reduce_mean(fc7, axis=[1, 2]) print(fc7.get_shape()) final_feature_vec = tf.concat(values=[concatenated_features, fc7], axis=1) print("Final feature vector dimensions") print(final_feature_vec.get_shape()) # region classification cls_prob, bbox_pred = self._region_classification( fc7, is_training, initializer, initializer_bbox) self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def resnet_base(rgb_img_batch, ir_img_batch, scope_name, is_training=True): if scope_name == 'resnet_v1_50': middle_num_units = 6 elif scope_name == 'resnet_v1_101': middle_num_units = 23 else: raise NotImplementedError('We only support resnet_v1_50 or resnet_v1_101. ') org_scope_name = scope_name blocks = [resnet_v1_block('RGB/resnet_v1_50/block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('RGB/resnet_v1_50/block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('RGB/resnet_v1_50/block3', base_depth=256, num_units=middle_num_units, stride=2), resnet_v1_block('RGB/resnet_v1_50/block4', base_depth=512, num_units=3, stride=1)] # when use fpn . stride list is [1, 2, 2] scope_name = "RGB/"+org_scope_name with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope(scope_name, scope_name): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net_rgb = resnet_utils.conv2d_same( rgb_img_batch, 64, 7, stride=2, scope='conv1') net_rgb = tf.pad(net_rgb, [[0, 0], [1, 1], [1, 1], [0, 0]]) net_rgb = slim.max_pool2d( net_rgb, [3, 3], stride=2, padding='VALID', scope='pool1') not_freezed = [False] * cfgs.RGB_FIXED_BLOCKS + (4-cfgs.RGB_FIXED_BLOCKS)*[True] # Fixed_Blocks can be 1~3 with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[0]))): C2_rgb, end_points_C2_rgb = resnet_v1.resnet_v1(net_rgb, blocks[0:1], global_pool=False, include_root_block=False, scope=scope_name) # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape') # add_heatmap(C2, name='Layer2/C2_heat') with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[1]))): C3_rgb, end_points_C3_rgb = resnet_v1.resnet_v1(C2_rgb, blocks[1:2], global_pool=False, include_root_block=False, scope=scope_name) # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape') # add_heatmap(C3, name='Layer3/C3_heat') with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))): C4_rgb, end_points_C4_rgb = resnet_v1.resnet_v1(C3_rgb, blocks[2:3], global_pool=False, include_root_block=False, scope=scope_name) # add_heatmap(C4, name='Layer4/C4_heat') # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C5_rgb, end_points_C5_rgb = resnet_v1.resnet_v1(C4_rgb, blocks[3:4], global_pool=False, include_root_block=False, scope=scope_name) # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape') # add_heatmap(C5, name='Layer5/C5_heat') blocks = [resnet_v1_block('IR/resnet_v1_50/block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('IR/resnet_v1_50/block2', base_depth=128, num_units=4, stride=2), resnet_v1_block('IR/resnet_v1_50/block3', base_depth=256, num_units=middle_num_units, stride=2), resnet_v1_block('IR/resnet_v1_50/block4', base_depth=512, num_units=3, stride=1)] scope_name = "IR/"+org_scope_name with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope(scope_name, scope_name): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net_ir = resnet_utils.conv2d_same( ir_img_batch, 64, 7, stride=2, scope='conv1') net_ir = tf.pad(net_ir, [[0, 0], [1, 1], [1, 1], [0, 0]]) net_ir = slim.max_pool2d( net_ir, [3, 3], stride=2, padding='VALID', scope='pool1') not_freezed = [False] * cfgs.IR_FIXED_BLOCKS + (4-cfgs.IR_FIXED_BLOCKS)*[True] # Fixed_Blocks can be 1~3 with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[0]))): C2_ir, end_points_C2_ir = resnet_v1.resnet_v1(net_ir, blocks[0:1], global_pool=False, include_root_block=False, scope=scope_name) # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape') # add_heatmap(C2, name='Layer2/C2_heat') with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[1]))): C3_ir, end_points_C3_ir = resnet_v1.resnet_v1(C2_ir, blocks[1:2], global_pool=False, include_root_block=False, scope=scope_name) # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape') # add_heatmap(C3, name='Layer3/C3_heat') with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))): C4_ir, end_points_C4_ir = resnet_v1.resnet_v1(C3_ir, blocks[2:3], global_pool=False, include_root_block=False, scope=scope_name) # add_heatmap(C4, name='Layer4/C4_heat') # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): C5_ir, end_points_C5_ir = resnet_v1.resnet_v1(C4_ir, blocks[3:4], global_pool=False, include_root_block=False, scope=scope_name) # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape') # add_heatmap(C5, name='Layer5/C5_heat') multi_end_points_C2 = tf.concat(axis=3, values = [end_points_C2_rgb['{}/block1/unit_2/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name)], end_points_C2_ir['{}/block1/unit_2/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name)]]) multi_end_points_C3 = tf.concat(axis=3, values = [end_points_C3_rgb['{}/block2/unit_3/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name)], end_points_C3_ir['{}/block2/unit_3/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name)]]) multi_end_points_C4 = tf.concat(axis=3, values = [end_points_C4_rgb['{}/block3/unit_{}/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name, middle_num_units - 1)], end_points_C4_ir['{}/block3/unit_{}/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name, middle_num_units - 1)]]) multi_end_points_C5 = tf.concat(axis=3, values = [end_points_C5_rgb['{}/block4/unit_3/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name)], end_points_C5_ir['{}/block4/unit_3/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name)]]) feature_dict = {'C2': multi_end_points_C2, 'C3': multi_end_points_C3, 'C4': multi_end_points_C4, 'C5': multi_end_points_C5, # 'C5': end_points_C5['{}/block4'.format(scope_name)], } scope_name = org_scope_name pyramid_dict = {} with tf.variable_scope('build_pyramid'): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(cfgs.WEIGHT_DECAY), activation_fn=None, normalizer_fn=None): conv_channels = 256 last_fm = None for i in range(3): fm = feature_dict['C{}'.format(5-i)] fm_1x1_conv = slim.conv2d(fm, num_outputs=conv_channels, kernel_size=[1, 1], stride=1, scope='p{}_1x1_conv'.format(5-i)) if last_fm is not None: h, w = tf.shape(fm_1x1_conv)[1], tf.shape(fm_1x1_conv)[2] last_resize = tf.image.resize_bilinear(last_fm, size=[h, w], name='p{}_up2x'.format(5-i)) fm_1x1_conv = fm_1x1_conv + last_resize last_fm = fm_1x1_conv fm_3x3_conv = slim.conv2d(fm_1x1_conv, num_outputs=conv_channels, kernel_size=[3, 3], padding="SAME", stride=1, scope='p{}_3x3_conv'.format(5 - i)) pyramid_dict['P{}'.format(5-i)] = fm_3x3_conv p6 = slim.conv2d(pyramid_dict['P5'], num_outputs=conv_channels, kernel_size=[3, 3], padding="SAME", stride=2, scope='p6_conv') pyramid_dict['P6'] = p6 p7 = tf.nn.relu(p6) p7 = slim.conv2d(p7, num_outputs=conv_channels, kernel_size=[3, 3], padding="SAME", stride=2, scope='p7_conv') pyramid_dict['P7'] = p7 # for level in range(7, 1, -1): # add_heatmap(pyramid_dict['P%d' % level], name='Layer%d/P%d_heat' % (level, level)) return pyramid_dict