Beispiel #1
0
    def __init__(self):

        self._num_classes = cfg.NUM_CLASSES
        self._batch_size = cfg.TRAIN.BATCH_SIZE
        self._latent_size = 128
        self._hidden_size = 256
        self._supervised_scaling_const = 0.1 * (55000 / 100)

        self._x_labeled = tf.placeholder(tf.float32, shape=[self._batch_size, 28, 28, 1])
        self._x_unlabeled = tf.placeholder(tf.float32, shape=[self._batch_size, 28, 28, 1])
        self._x = tf.concat([self._x_labeled, self._x_unlabeled], 0)
        self._y_labeled = tf.placeholder(tf.float32, shape=[self._batch_size, self._num_classes])
        self._y_all, self.y_unlabeled = self.generate_y(self._y_labeled)

        self._losses = {}

        self._initializer = self.define_initializer()
        self._blocks_encoder = [resnet_utils.Block('block4', bottleneck, [(256, 128, 1)] * 3)]
        self._blocks_decoder_valid = [resnet_utils.Block('block5', bottleneck_trans_valid,
                                                         [(256, 128, 1), (256, 128, 2)])]
        self._blocks_decoder_same = [resnet_utils.Block('block5', bottleneck_trans_same,
                                                        [(256, 128, 2), (256, 128, 2)])]
        self._resnet_scope = 'resnet_v1_%d' % 101

        x_unlabeled_tiled = tf.tile(self._x_unlabeled, [self._num_classes, 1, 1, 1])  # (100, 256) --> (2100, 256)
        self.outputs = {'labeled': {'x_in': self._x_labeled}, 'unlabeled': {'x_in': x_unlabeled_tiled}}
Beispiel #2
0
def resnet_v2_200(inputs,
                  num_classes=None,
                  global_pool=True,
                  output_stride=None,
                  reuse=None,
                  scope='resnet_v2_200'):
  """ResNet-200 model of [2]. See resnet_v2() for arg and return description."""
  blocks = [
      resnet_utils.Block('block1', bottleneck,
                         [(256, 64, 1)] * 2 + [(256, 64, 2)]),
      resnet_utils.Block('block2', bottleneck,
                         [(512, 128, 1)] * 23 + [(512, 128, 2)]),
      resnet_utils.Block('block3', bottleneck,
                         [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
      resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
  ]
  return resnet_v2(
      inputs,
      blocks,
      num_classes,
      global_pool,
      output_stride,
      include_root_block=True,
      reuse=reuse,
      scope=scope)
Beispiel #3
0
 def testEndPointsV1(self):
   """Test the end points of a tiny v1 bottleneck network."""
   bottleneck = resnet_v1.bottleneck
   blocks = [
       resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
       resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)])
   ]
   inputs = create_test_input(2, 32, 16, 3)
   with arg_scope(resnet_utils.resnet_arg_scope()):
     _, end_points = self._resnet_plain(inputs, blocks, scope='tiny')
   expected = [
       'tiny/block1/unit_1/bottleneck_v1/shortcut',
       'tiny/block1/unit_1/bottleneck_v1/shortcut/BatchNorm',
       'tiny/block1/unit_1/bottleneck_v1/conv1',
       'tiny/block1/unit_1/bottleneck_v1/conv2',
       'tiny/block1/unit_1/bottleneck_v1/conv3',
       'tiny/block1/unit_1/bottleneck_v1/conv3/BatchNorm',
       'tiny/block1/unit_2/bottleneck_v1/conv1',
       'tiny/block1/unit_2/bottleneck_v1/conv2',
       'tiny/block1/unit_2/bottleneck_v1/conv3',
       'tiny/block1/unit_2/bottleneck_v1/conv3/BatchNorm',
       'tiny/block2/unit_1/bottleneck_v1/shortcut',
       'tiny/block2/unit_1/bottleneck_v1/shortcut/BatchNorm',
       'tiny/block2/unit_1/bottleneck_v1/conv1',
       'tiny/block2/unit_1/bottleneck_v1/conv2',
       'tiny/block2/unit_1/bottleneck_v1/conv3',
       'tiny/block2/unit_1/bottleneck_v1/conv3/BatchNorm',
       'tiny/block2/unit_2/bottleneck_v1/conv1',
       'tiny/block2/unit_2/bottleneck_v1/conv2',
       'tiny/block2/unit_2/bottleneck_v1/conv3',
       'tiny/block2/unit_2/bottleneck_v1/conv3/BatchNorm'
   ]
   self.assertItemsEqual(expected, end_points)
Beispiel #4
0
    def __init__(self, is_training):
        self.name = self.__class__.__name__.lower()
        self.vars = {}      
        self.layers = []
        self.activations = []  
        self.visualize = {}
        self.intermediate = {}
        self.predictions = {}
        self.losses = {}
        self.score_summaries = {}
        self.event_summaries = {}
        
        
        self.image       = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image')
        self.H_boxes     = tf.placeholder(tf.float32, shape=[None , 5], name = 'H_boxes')
        self.O_boxes     = tf.placeholder(tf.float32, shape=[None , 5], name = 'O_boxes')
        self.H_boxes_enc = tf.placeholder(tf.float32, shape=[None , 4], name = 'H_boxes_enc')
        self.O_boxes_enc = tf.placeholder(tf.float32, shape=[None , 4], name = 'O_boxes_enc')
        self.HO_boxes_enc= tf.placeholder(tf.float32, shape=[None , 4], name = 'HO_boxes_enc')
        self.gt_class_H  = tf.placeholder(tf.float32, shape=[None, 26], name = 'gt_class_H')
        self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 26], name = 'gt_class_HO')
        self.Mask_HO     = tf.placeholder(tf.float32, shape=[None, 26], name = 'HO_mask')
        self.Mask_H      = tf.placeholder(tf.float32, shape=[None, 26], name = 'H_mask')
        self.H_num       = tf.placeholder(tf.int32) # pos
        self.ivs         = tf.placeholder(tf.int32, shape=[26], name = 'idx_GT_verbs')
        self.inputs      = tf.placeholder(tf.float32, shape=[None, 300], name = 'embedding')         
        self.support     = [tf.sparse_placeholder(tf.float32) for _ in range(1)]
        self.num_nonzero = tf.placeholder(tf.int32)
        self.in_dim      = 300
        self.hidden_dim  = 512
        self.out_dim     = 512
        self.num_classes = 26
        self.scope       = 'resnet_v1_50'
        self.stride      = [16, ]
        self.train       = is_training 
        
        self.now_lr      = None
        self.optimizer   = None
        self.opt_op      = None        
        if tf.__version__ == '1.1.0':
            self.blocks     = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256,   64, 1)] * 2 + [(256,   64, 2)]),
                               resnet_utils.Block('block2', resnet_v1.bottleneck,[(512,  128, 1)] * 3 + [(512,  128, 2)]),
                               resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                               resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3),
                               resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)]
        else:
            from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
            self.blocks = [resnet_v1_block('block1', base_depth=64,  num_units=3, stride=2),
                           resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                           resnet_v1_block('block3', base_depth=256, num_units=6, stride=1),
                           resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
                           resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)]

        self.build_all()
    def __init__(self):
        self.visualize = {}
        self.intermediate = {}
        self.predictions = {}
        self.score_summaries = {}
        self.event_summaries = {}
        self.train_summaries = []
        self.losses = {}

        self.image       = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image')
        self.spatial     = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name = 'sp') # Pattern.reshape( num_pos_neg, 64, 64, 3) 
        self.Hsp_boxes   = tf.placeholder(tf.float32, shape=[None, 5], name = 'Hsp_boxes')
        self.O_boxes     = tf.placeholder(tf.float32, shape=[None, 5], name = 'O_boxes') #Object_augmented[:num_pos].reshape(num_pos, 5)
        self.gt_class_H  = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_H')
        self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_HO')
        self.gt_class_sp = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_sp')
        self.Mask_HO     = tf.placeholder(tf.float32, shape=[None, 29], name = 'HO_mask')
        self.Mask_H      = tf.placeholder(tf.float32, shape=[None, 29], name = 'H_mask')
        self.Mask_sp     = tf.placeholder(tf.float32, shape=[None, 29], name = 'sp_mask')
        self.gt_binary_label = tf.placeholder(tf.float32, shape=[None, 2], name = 'gt_binary_label')
        self.H_num       = tf.placeholder(tf.int32)
        self.HO_weight   = np.array([3.3510249, 3.4552405, 4.0257854, 0.0, 4.088436, 
                                    3.4370995, 3.85842, 4.637334, 3.5487218, 3.536237, 
                                    2.5578923, 3.342811, 3.8897269, 4.70686, 3.3952892, 
                                    3.9706533, 4.504736, 0.0, 1.4873443, 3.700363, 
                                    4.1058283, 3.6298118, 0.0, 6.490651, 5.0808263, 
                                    1.520838, 3.3888445, 0.0, 3.9899964], dtype = 'float32').reshape(1,29)
        self.H_weight   = np.array([4.0984106, 4.102459, 4.0414762, 4.060745, 4.0414762, 
                                    3.9768186, 4.23686, 5.3542085, 3.723717, 3.4699364, 
                                    2.4587274, 3.7167964, 4.08836, 5.050695, 3.9077065, 
                                    4.534647, 3.4699364, 2.9466882, 1.8585607, 3.9433942, 
                                    3.9433942, 4.3523254, 3.8368235, 6.4963055, 5.138182, 
                                    1.7807873, 4.080392, 1.9544303, 4.5761204],dtype = 'float32').reshape(1,29)
        self.binary_weight = np.array([1.0986122886681098, 0.4054651081081644], dtype = 'float32').reshape(1,2)
        self.num_classes = 29
        self.num_binary  = 2 # existence (0 or 1) of HOI
        self.num_fc      = 1024
        self.scope       = 'resnet_v1_50'
        self.stride      = [16, ]
        self.lr          = tf.placeholder(tf.float32)
        if tf.__version__ == '1.1.0':
            self.blocks     = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256,   64, 1)] * 2 + [(256,   64, 2)]),
                               resnet_utils.Block('block2', resnet_v1.bottleneck,[(512,  128, 1)] * 3 + [(512,  128, 2)]),
                               resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                               resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3),
                               resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)]
        else:
            from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
            self.blocks = [resnet_v1_block('block1', base_depth=64,  num_units=3, stride=2),
                           resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                           resnet_v1_block('block3', base_depth=256, num_units=6, stride=1),
                           resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
                           resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)]
Beispiel #6
0
def resnet_v1_block(scope, base_depth, num_units, stride, rate=1):
    """Helper function for creating a resnet_v1 bottleneck block.
    downsampling(stride=2) is done in each stage's first block!
    Args:
        scope: The scope of the block.
        base_depth: The depth of the bottleneck layer for each unit.
        num_units: The number of units in the block.
        stride: The stride of the block, implemented as a stride in the last unit.
        All other units have stride=1.

    Returns:
        A resnet_v1 bottleneck block.
    """
    return resnet_utils.Block(scope, bottleneck,
                              [{
                                  'depth': base_depth * 4,
                                  'depth_bottleneck': base_depth,
                                  'stride': stride,
                                  'rate': rate,
                              }] +
                              [{
                                  'depth': base_depth * 4,
                                  'depth_bottleneck': base_depth,
                                  'stride': 1,
                                  'rate': 1
                              }] * (num_units - 1))
Beispiel #7
0
    def _atrousValues(self, bottleneck):
        """Verify the values of dense feature extraction by atrous convolution.

    Make sure that dense feature extraction by stack_blocks_dense() followed by
    subsampling gives identical results to feature extraction at the nominal
    network output stride using the simple self._stack_blocks_nondense() above.

    Args:
      bottleneck: The bottleneck function.
    """
        blocks = [
            resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
            resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]),
            resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]),
            resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)])
        ]
        nominal_stride = 8

        # Test both odd and even input dimensions.
        height = 30
        width = 31
        with arg_scope(resnet_utils.resnet_arg_scope(is_training=False)):
            for output_stride in [1, 2, 4, 8, None]:
                with ops.Graph().as_default():
                    with self.test_session() as sess:
                        random_seed.set_random_seed(0)
                        inputs = create_test_input(1, height, width, 3)
                        # Dense feature extraction followed by subsampling.
                        output = resnet_utils.stack_blocks_dense(
                            inputs, blocks, output_stride)
                        if output_stride is None:
                            factor = 1
                        else:
                            factor = nominal_stride // output_stride

                        output = resnet_utils.subsample(output, factor)
                        # Make the two networks use the same weights.
                        variable_scope.get_variable_scope().reuse_variables()
                        # Feature extraction at the nominal network rate.
                        expected = self._stack_blocks_nondense(inputs, blocks)
                        sess.run(variables.global_variables_initializer())
                        output, expected = sess.run([output, expected])
                        self.assertAllClose(output,
                                            expected,
                                            atol=1e-4,
                                            rtol=1e-4)
def resnet_v1_block(scope, base_depth, num_units, stride):
  return resnet_utils.Block(scope, not_bottleneck, [{
      'depth': base_depth,
      'stride': stride
  }] + (num_units - 1) * [{
      'depth': base_depth,
      'stride': 1
  }])
Beispiel #9
0
def resnet50V2_reduced(inputs,
                       is_training=True,
                       output_stride=None,
                       include_root_block=True,
                       reuse=None,
                       scope=None):

    # These are the blocks for resnet 50
    blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 5)
    ]

    # Initialize Model
    with tf.variable_scope(scope, 'resnet_v2_50', [inputs], reuse=reuse):
        with slim.arg_scope(
            [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense]):
            with slim.arg_scope([slim.batch_norm],
                                is_training=is_training) as scope:
                net = inputs
                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError(
                                'The output_stride needs to be a multiple of 4.'
                            )
                        output_stride /= 4
                    with slim.arg_scope([slim.conv2d],
                                        activation_fn=None,
                                        normalizer_fn=None):
                        net = resnet_utils.conv2d_same(net,
                                                       64,
                                                       7,
                                                       stride=2,
                                                       scope='conv1')
                    net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
                net = resnet_utils.stack_blocks_dense(net, blocks,
                                                      output_stride)
    with slim.arg_scope([slim.batch_norm], is_training=is_training) as scope:
        net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')
    return net
 def resnet_v1_block(scope, bottleneck, base_depth, num_units, stride):
   return resnet_utils.Block(scope, bottleneck, [{
       'depth': base_depth * 4,
       'depth_bottleneck': base_depth,
       'stride': 1
   }] * (num_units - 1) + [{
       'depth': base_depth * 4,
       'depth_bottleneck': base_depth,
       'stride': stride
   }])
Beispiel #11
0
 def _resnet_small(self,
                   inputs,
                   num_classes=None,
                   global_pool=True,
                   output_stride=None,
                   include_root_block=True,
                   reuse=None,
                   scope='resnet_v1_small'):
   """A shallow and thin ResNet v1 for faster tests."""
   bottleneck = resnet_v1.bottleneck
   blocks = [
       resnet_utils.Block('block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]),
       resnet_utils.Block('block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]),
       resnet_utils.Block('block3', bottleneck,
                          [(16, 4, 1)] * 2 + [(16, 4, 2)]),
       resnet_utils.Block('block4', bottleneck, [(32, 8, 1)] * 2)
   ]
   return resnet_v1.resnet_v1(inputs, blocks, num_classes, global_pool,
                              output_stride, include_root_block, reuse, scope)
Beispiel #12
0
    def build_network(self):
        # select initializers

        bottleneck = resnet_v1.bottleneck
        blocks = [resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
                  resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
                  resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                  resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)]

        assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4)
        if cfg.RESNET.FIXED_BLOCKS == 3:
            with slim.arg_scope(resnet_arg_scope(trainable=False)):
                net = self.build_base()
                net_conv4, _ = resnet_v1.resnet_v1(net,
                                                    blocks[0:cfg.RESNET.FIXED_BLOCKS],
                                                    global_pool=False,
                                                    include_root_block=False,
                                                    scope=self._resnet_scope)
        elif cfg.RESNET.FIXED_BLOCKS > 0:
            with slim.arg_scope(resnet_arg_scope(trainable=False)):
                net = self.build_base()
                net, _ = resnet_v1.resnet_v1(net,
                                             blocks[0:cfg.RESNET.FIXED_BLOCKS],
                                             global_pool=False,
                                             include_root_block=False,
                                             scope=self._resnet_scope)

            with slim.arg_scope(resnet_arg_scope()):
                net_conv4, _ = resnet_v1.resnet_v1(net,
                                                    blocks[cfg.RESNET.FIXED_BLOCKS:-1],
                                                    global_pool=False,
                                                    include_root_block=False,
                                                    scope=self._resnet_scope)
        else:    # cfg.RESNET.FIXED_BLOCKS == 0
            with slim.arg_scope(resnet_arg_scope()):
                net = self.build_base()
                net_conv4, _ = resnet_v1.resnet_v1(net,
                                                    blocks[0:-1],
                                                    global_pool=False,
                                                    include_root_block=False,
                                                    scope=self._resnet_scope)
        return net_conv4
Beispiel #13
0
def resnet_v2_block(scope, base_depth, num_units, stride):
    """Helper function for creating a resnet_v2 bottleneck block."""
    return resnet_utils.Block(scope, bottleneck, [{
        'depth': base_depth * 4,
        'depth_bottleneck': base_depth,
        'stride': 1
    }] * (num_units - 1) + [{
        'depth': base_depth * 4,
        'depth_bottleneck': base_depth,
        'stride': stride
    }])
Beispiel #14
0
    def __init__(self):
        self.visualize = {}
        self.intermediate = {}
        self.predictions = {}
        self.score_summaries = {}
        self.event_summaries = {}
        self.train_summaries = []
        self.losses = {}

        self.image       = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image')
        self.spatial     = tf.placeholder(tf.float32, shape=[None, 64, 64, 2], name = 'sp')
        self.Hsp_boxes   = tf.placeholder(tf.float32, shape=[None, 5], name = 'Hsp_boxes')
        self.O_boxes     = tf.placeholder(tf.float32, shape=[None, 5], name = 'O_boxes')
        self.gt_class_H  = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_H')
        self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_HO')
        self.gt_class_sp = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_sp')
        self.Mask_HO     = tf.placeholder(tf.float32, shape=[None, 29], name = 'HO_mask')
        self.Mask_H      = tf.placeholder(tf.float32, shape=[None, 29], name = 'H_mask')
        self.Mask_sp     = tf.placeholder(tf.float32, shape=[None, 29], name = 'sp_mask')
        self.H_num       = tf.placeholder(tf.int32)
        self.num_classes = 29
        self.num_fc      = 1024
        self.scope       = 'resnet_v1_50'
        self.stride      = [16, ]
        self.lr          = tf.placeholder(tf.float32)
        if tf.__version__ == '1.1.0':
            self.blocks     = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256,   64, 1)] * 2 + [(256,   64, 2)]),
                               resnet_utils.Block('block2', resnet_v1.bottleneck,[(512,  128, 1)] * 3 + [(512,  128, 2)]),
                               resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                               resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3),
                               resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)]
        else:
            from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
            self.blocks = [resnet_v1_block('block1', base_depth=64,  num_units=3, stride=2),
                           resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                           resnet_v1_block('block3', base_depth=256, num_units=6, stride=1),
                           resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
                           resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)]
Beispiel #15
0
def resnet_v1_block(scope, base_depth, num_units, stride):
    """Helper function for creating a resnet_v1 bottleneck block.
  Args:
    scope: The scope of the block.
    base_depth: The depth of the bottleneck layer for each unit.
    num_units: The number of units in the block.
    stride: The stride of the block, implemented as a stride in the last unit.
      All other units have stride=1.
  Returns:
    A resnet_v1 bottleneck block.
  """
    return resnet_utils.Block(scope, naive, [{
        'depth': base_depth,
        'stride': 1
    }] * (num_units - 1) + [{
        'depth': base_depth,
        'stride': stride
    }])
Beispiel #16
0
def resnet_v1_block(scope,
                    base_depth,
                    num_units,
                    stride,
                    deformable_units=0,
                    rate=1):
    """Helper function for creating a resnet_v1 bottleneck block.
    downsampling(stride=2) is done in each stage's first block!
    Args:
        scope: The scope of the block.
        base_depth: The depth of the bottleneck layer for each unit.
        num_units: The number of units in the block.
        stride: The stride of the block, implemented as a stride in the last unit.
        All other units have stride=1.

    Returns:
        A resnet_v1 bottleneck block.
    """
    assert num_units >= deformable_units
    num_units -= deformable_units

    return resnet_utils.Block(
        scope,
        deformable_bottleneck,
        [{
            'depth': base_depth * 4,
            'depth_bottleneck': base_depth,
            'stride': stride,
            'rate': rate,
            'deformable': False
        }] + [{
            'depth': base_depth * 4,
            'depth_bottleneck': base_depth,
            'stride': 1,
            'rate': 1,
            'deformable': False
        }] * (num_units - 1) + [{
            'depth': base_depth * 4,
            'depth_bottleneck': base_depth,
            'stride': 1,
            # TODO: set all deformables's rate to be 2!
            'rate': 1,
            'deformable': True
        }] * (deformable_units))
Beispiel #17
0
def resnet_v2_block(scope, base_depth, num_units, stride):
  """Helper function for creating a resnet_v2 bottleneck block.
  Args:
    scope: The scope of the block.
    base_depth: The depth of the bottleneck layer for each unit.
    num_units: The number of units in the block.
    stride: The stride of the block, implemented as a stride in the last unit.
      All other units have stride=1.
      网络层数 = 残差单元个数之和 *3 +2,*3是因为每个残差单元有3层网络。 +2是一个输入一个输出。
      50 = 16*3 + 2
  Returns:
    A resnet_v2 bottleneck block.
  """
  return resnet_utils.Block(scope, bottleneck, [{
      'depth': base_depth * 4,
      'depth_bottleneck': base_depth,
      'stride': 1
  }] * (num_units - 1) + [{
      'depth': base_depth * 4,
      'depth_bottleneck': base_depth,
      'stride': stride
  }])
Beispiel #18
0
    def build_network(self, sess, is_training=True):
        # select initializers
        if cfg.TRAIN.TRUNCATED:
            initializer = tf.truncated_normal_initializer(mean=0.0,
                                                          stddev=0.01)
            initializer_bbox = tf.truncated_normal_initializer(mean=0.0,
                                                               stddev=0.001)
        else:
            initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
            initializer_bbox = tf.random_normal_initializer(mean=0.0,
                                                            stddev=0.001)
        bottleneck = resnet_v1.bottleneck
        # choose different blocks for different number of layers
        if self._num_layers == 50:
            blocks = [
                resnet_utils.Block('block1', bottleneck,
                                   [(256, 64, 1)] * 2 + [(256, 64, 2)]),
                resnet_utils.Block('block2', bottleneck,
                                   [(512, 128, 1)] * 3 + [(512, 128, 2)]),
                # Use stride-1 for the last conv4 layer
                resnet_utils.Block('block3', bottleneck,
                                   [(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
            ]
        elif self._num_layers == 101:
            blocks = [
                resnet_utils.Block('block1', bottleneck,
                                   [(256, 64, 1)] * 2 + [(256, 64, 2)]),
                resnet_utils.Block('block2', bottleneck,
                                   [(512, 128, 1)] * 3 + [(512, 128, 2)]),
                # Use stride-1 for the last conv4 layer
                resnet_utils.Block('block3', bottleneck,
                                   [(1024, 256, 1)] * 22 + [(1024, 256, 1)]),
                resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
            ]
        elif self._num_layers == 152:
            blocks = [
                resnet_utils.Block('block1', bottleneck,
                                   [(256, 64, 1)] * 2 + [(256, 64, 2)]),
                resnet_utils.Block('block2', bottleneck,
                                   [(512, 128, 1)] * 7 + [(512, 128, 2)]),
                # Use stride-1 for the last conv4 layer
                resnet_utils.Block('block3', bottleneck,
                                   [(1024, 256, 1)] * 35 + [(1024, 256, 1)]),
                resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
            ]
        else:
            # other numbers are not supported
            raise NotImplementedError

        assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4)
        if cfg.RESNET.FIXED_BLOCKS == 3:
            with slim.arg_scope(resnet_arg_scope(is_training=False)):
                net = self.build_base()
                net_conv4, _ = resnet_v1.resnet_v1(
                    net,
                    blocks[0:cfg.RESNET.FIXED_BLOCKS],
                    global_pool=False,
                    include_root_block=False,
                    scope=self._resnet_scope)
        elif cfg.RESNET.FIXED_BLOCKS > 0:
            with slim.arg_scope(resnet_arg_scope(is_training=False)):
                net = self.build_base()
                net, _ = resnet_v1.resnet_v1(net,
                                             blocks[0:cfg.RESNET.FIXED_BLOCKS],
                                             global_pool=False,
                                             include_root_block=False,
                                             scope=self._resnet_scope)

            with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
                net_conv4, _ = resnet_v1.resnet_v1(
                    net,
                    blocks[cfg.RESNET.FIXED_BLOCKS:-1],
                    global_pool=False,
                    include_root_block=False,
                    scope=self._resnet_scope)
        else:  # cfg.RESNET.FIXED_BLOCKS == 0
            with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
                net = self.build_base()
                net_conv4, _ = resnet_v1.resnet_v1(net,
                                                   blocks[0:-1],
                                                   global_pool=False,
                                                   include_root_block=False,
                                                   scope=self._resnet_scope)
        self._act_summaries.append(net_conv4)
        self._layers['head'] = net_conv4

        if False:
            with tf.variable_scope('noise'):
                #kernel = tf.get_variable('weights',
                #shape=[5, 5, 3, 3],
                #initializer=tf.constant_initializer(c))
                conv = tf.nn.conv2d(self.noise,
                                    Wcnn, [1, 1, 1, 1],
                                    padding='SAME',
                                    name='srm')
            self._layers['noise'] = conv
            with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
                #srm_conv = tf.nn.tanh(conv, name='tanh')
                noise_net = resnet_utils.conv2d_same(conv,
                                                     64,
                                                     7,
                                                     stride=2,
                                                     scope='conv1')
                noise_net = tf.pad(noise_net, [[0, 0], [1, 1], [1, 1], [0, 0]])
                noise_net = slim.max_pool2d(noise_net, [3, 3],
                                            stride=2,
                                            padding='VALID',
                                            scope='pool1')
                #net_sum=tf.concat(3,[net_conv4,noise_net])
                noise_conv4, _ = resnet_v1.resnet_v1(noise_net,
                                                     blocks[0:-1],
                                                     global_pool=False,
                                                     include_root_block=False,
                                                     scope='noise')
        with tf.variable_scope(self._resnet_scope, self._resnet_scope):
            # build the anchors for the image
            self._anchor_component()

            # rpn
            rpn = slim.conv2d(net_conv4,
                              512, [3, 3],
                              trainable=is_training,
                              weights_initializer=initializer,
                              scope="rpn_conv/3x3")
            self._act_summaries.append(rpn)
            rpn_cls_score = slim.conv2d(rpn,
                                        self._num_anchors * 2, [1, 1],
                                        trainable=is_training,
                                        weights_initializer=initializer,
                                        padding='VALID',
                                        activation_fn=None,
                                        scope='rpn_cls_score')
            # change it so that the score has 2 as its channel size
            rpn_cls_score_reshape = self._reshape_layer(
                rpn_cls_score, 2, 'rpn_cls_score_reshape')
            rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape,
                                                       "rpn_cls_prob_reshape")
            rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape,
                                               self._num_anchors * 2,
                                               "rpn_cls_prob")
            rpn_bbox_pred = slim.conv2d(rpn,
                                        self._num_anchors * 4, [1, 1],
                                        trainable=is_training,
                                        weights_initializer=initializer,
                                        padding='VALID',
                                        activation_fn=None,
                                        scope='rpn_bbox_pred')
            if is_training:
                rois, roi_scores = self._proposal_layer(
                    rpn_cls_prob, rpn_bbox_pred, "rois")
                rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
                # Try to have a determinestic order for the computing graph, for reproducibility
                with tf.control_dependencies([rpn_labels]):
                    rois, _ = self._proposal_target_layer(
                        rois, roi_scores, "rpn_rois")
            else:
                if cfg.TEST.MODE == 'nms':
                    rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred,
                                                   "rois")
                elif cfg.TEST.MODE == 'top':
                    rois, _ = self._proposal_top_layer(rpn_cls_prob,
                                                       rpn_bbox_pred, "rois")
                else:
                    raise NotImplementedError
            # rcnn
            if cfg.POOLING_MODE == 'crop':
                pool5 = self._crop_pool_layer(net_conv4, rois, "pool5")
                #pool5 = self._crop_pool_layer(net_sum, rois, "pool5")
            else:
                raise NotImplementedError
        if False:
            noise_pool5 = self._crop_pool_layer(noise_conv4, rois,
                                                "noise_pool5")
            with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
                noise_fc7, _ = resnet_v1.resnet_v1(noise_pool5,
                                                   blocks[-1:],
                                                   global_pool=False,
                                                   include_root_block=False,
                                                   scope='noise')
        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            fc7, _ = resnet_v1.resnet_v1(pool5,
                                         blocks[-1:],
                                         global_pool=False,
                                         include_root_block=False,
                                         scope=self._resnet_scope)
        self._layers['fc7'] = fc7
        with tf.variable_scope(self._resnet_scope, self._resnet_scope):
            #pdb.set_trace()
            #noise_fc7 = tf.reduce_mean(noise_fc7, axis=[1, 2])
            #bilinear_pool=compact_bilinear_pooling_layer(fc7,noise_fc7,2048*4,compute_size=16,sequential=False)
            #bilinear_pool=tf.reshape(bilinear_pool, [-1,2048*4])
            fc7 = tf.reduce_mean(fc7, axis=[1, 2])
            cls_score = slim.fully_connected(fc7,
                                             self._num_classes,
                                             weights_initializer=initializer,
                                             trainable=is_training,
                                             activation_fn=None,
                                             scope='cls_score')
            #pdb.set_trace()
            #noise_cls_score = slim.fully_connected(bilinear_pool, self._num_classes, weights_initializer=initializer,
            #trainable=is_training, activation_fn=None, scope='noise_cls_score')
            cls_prob = self._softmax_layer(cls_score, "cls_prob")
            bbox_pred = slim.fully_connected(
                fc7,
                self._num_classes * 4,
                weights_initializer=initializer_bbox,
                trainable=is_training,
                activation_fn=None,
                scope='bbox_pred')
        #with tf.variable_scope(self._resnet_scope, self._resnet_scope):
        # Average pooling done by reduce_mean
        #fc7 = tf.reduce_mean(fc7, axis=[1, 2])
        #fc_con=tf.concat(1,[fc7,noise_fc])
        #cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer,
        #trainable=False, activation_fn=None, scope='cls_score')
        #cls_score1=cls_score+10*noise_cls_score
        #cls_prob = self._softmax_layer(noise_cls_score, "cls_prob")
        #bbox_pred = slim.fully_connected(fc7, self._num_classes * 4, weights_initializer=initializer_bbox,
        #trainable=False,
        #activation_fn=None, scope='bbox_pred')
        self._predictions["rpn_cls_score"] = rpn_cls_score
        self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
        self._predictions["rpn_cls_prob"] = rpn_cls_prob
        self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
        self._predictions["cls_score"] = cls_score
        self._predictions["cls_prob"] = cls_prob
        self._predictions["bbox_pred"] = bbox_pred
        self._predictions["rois"] = rois

        self._score_summaries.update(self._predictions)

        return rois, cls_prob, bbox_pred
Beispiel #19
0
    def __init__(self):
        self.visualize = {}
        self.intermediate = {}
        self.predictions = {}
        self.score_summaries = {}
        self.event_summaries = {}
        self.train_summaries = []
        self.losses = {}

        self.image       = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image')
        self.spatial     = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name = 'sp')
        self.H_boxes     = tf.placeholder(tf.float32, shape=[None, 5], name = 'H_boxes')
        self.O_boxes     = tf.placeholder(tf.float32, shape=[None, 5], name = 'O_boxes')
        self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 600], name = 'gt_class_HO')
        self.gt_binary_label = tf.placeholder(tf.float32, shape=[None, 2], name = 'gt_binary_label')
        self.H_num       = tf.placeholder(tf.int32)
        self.HO_weight   = np.array([
                9.192927, 9.778443, 10.338059, 9.164914, 9.075144, 10.045923, 8.714437, 8.59822, 12.977117, 6.2745423, 
                11.227917, 6.765012, 9.436157, 9.56762, 11.0675745, 11.530198, 9.609821, 9.897503, 6.664475, 6.811699, 
                6.644726, 9.170454, 13.670264, 3.903943, 10.556748, 8.814335, 9.519224, 12.753973, 11.590822, 8.278912, 
                5.5245695, 9.7286825, 8.997436, 10.699849, 9.601237, 11.965516, 9.192927, 10.220277, 6.056692, 7.734048, 
                8.42324, 6.586457, 6.969533, 10.579222, 13.670264, 4.4531965, 9.326459, 9.288238, 8.071842, 10.431585, 
                12.417501, 11.530198, 11.227917, 4.0678477, 8.854023, 12.571651, 8.225684, 10.996116, 11.0675745, 10.100731, 
                7.0376034, 7.463688, 12.571651, 14.363411, 5.4902234, 11.0675745, 14.363411, 8.45805, 10.269067, 9.820116, 
                14.363411, 11.272368, 11.105314, 7.981595, 9.198626, 3.3284247, 14.363411, 12.977117, 9.300817, 10.032678, 
                12.571651, 10.114916, 10.471591, 13.264799, 14.363411, 8.01953, 10.412168, 9.644913, 9.981384, 7.2197933, 
                14.363411, 3.1178555, 11.031207, 8.934066, 7.546675, 6.386472, 12.060826, 8.862153, 9.799063, 12.753973, 
                12.753973, 10.412168, 10.8976755, 10.471591, 12.571651, 9.519224, 6.207762, 12.753973, 6.60636, 6.2896967, 
                4.5198326, 9.7887, 13.670264, 11.878505, 11.965516, 8.576513, 11.105314, 9.192927, 11.47304, 11.367679, 
                9.275815, 11.367679, 9.944571, 11.590822, 10.451388, 9.511381, 11.144535, 13.264799, 5.888291, 11.227917, 
                10.779892, 7.643191, 11.105314, 9.414651, 11.965516, 14.363411, 12.28397, 9.909063, 8.94731, 7.0330057, 
                8.129001, 7.2817025, 9.874775, 9.758241, 11.105314, 5.0690055, 7.4768796, 10.129305, 9.54313, 13.264799, 
                9.699972, 11.878505, 8.260853, 7.1437693, 6.9321113, 6.990665, 8.8104515, 11.655361, 13.264799, 4.515912, 
                9.897503, 11.418972, 8.113436, 8.795067, 10.236277, 12.753973, 14.363411, 9.352776, 12.417501, 0.6271591, 
                12.060826, 12.060826, 12.166186, 5.2946343, 11.318889, 9.8308115, 8.016022, 9.198626, 10.8976755, 13.670264, 
                11.105314, 14.363411, 9.653881, 9.503599, 12.753973, 5.80546, 9.653881, 9.592727, 12.977117, 13.670264, 
                7.995224, 8.639826, 12.28397, 6.586876, 10.929424, 13.264799, 8.94731, 6.1026597, 12.417501, 11.47304, 
                10.451388, 8.95624, 10.996116, 11.144535, 11.031207, 13.670264, 13.670264, 6.397866, 7.513285, 9.981384, 
                11.367679, 11.590822, 7.4348736, 4.415428, 12.166186, 8.573451, 12.977117, 9.609821, 8.601359, 9.055143, 
                11.965516, 11.105314, 13.264799, 5.8201604, 10.451388, 9.944571, 7.7855496, 14.363411, 8.5463, 13.670264, 
                7.9288645, 5.7561946, 9.075144, 9.0701065, 5.6871653, 11.318889, 10.252538, 9.758241, 9.407584, 13.670264, 
                8.570397, 9.326459, 7.488179, 11.798462, 9.897503, 6.7530537, 4.7828183, 9.519224, 7.6492405, 8.031909, 
                7.8180614, 4.451856, 10.045923, 10.83705, 13.264799, 13.670264, 4.5245686, 14.363411, 10.556748, 10.556748, 
                14.363411, 13.670264, 14.363411, 8.037262, 8.59197, 9.738439, 8.652985, 10.045923, 9.400566, 10.9622135, 
                11.965516, 10.032678, 5.9017305, 9.738439, 12.977117, 11.105314, 10.725825, 9.080208, 11.272368, 14.363411, 
                14.363411, 13.264799, 6.9279733, 9.153925, 8.075553, 9.126969, 14.363411, 8.903826, 9.488214, 5.4571533, 
                10.129305, 10.579222, 12.571651, 11.965516, 6.237189, 9.428937, 9.618479, 8.620408, 11.590822, 11.655361, 
                9.968962, 10.8080635, 10.431585, 14.363411, 3.796231, 12.060826, 10.302968, 9.551227, 8.75394, 10.579222, 
                9.944571, 14.363411, 6.272396, 10.625742, 9.690582, 13.670264, 11.798462, 13.670264, 11.724354, 9.993963, 
                8.230013, 9.100721, 10.374427, 7.865129, 6.514087, 14.363411, 11.031207, 11.655361, 12.166186, 7.419324, 
                9.421769, 9.653881, 10.996116, 12.571651, 13.670264, 5.912144, 9.7887, 8.585759, 8.272101, 11.530198, 8.886948, 
                5.9870906, 9.269661, 11.878505, 11.227917, 13.670264, 8.339964, 7.6763024, 10.471591, 10.451388, 13.670264, 
                11.185357, 10.032678, 9.313555, 12.571651, 3.993144, 9.379805, 9.609821, 14.363411, 9.709451, 8.965248, 
                10.451388, 7.0609145, 10.579222, 13.264799, 10.49221, 8.978916, 7.124196, 10.602211, 8.9743395, 7.77862, 
                8.073695, 9.644913, 9.339531, 8.272101, 4.794418, 9.016304, 8.012526, 10.674532, 14.363411, 7.995224, 
                12.753973, 5.5157638, 8.934066, 10.779892, 7.930471, 11.724354, 8.85808, 5.9025764, 14.363411, 12.753973, 
                12.417501, 8.59197, 10.513264, 10.338059, 14.363411, 7.7079706, 14.363411, 13.264799, 13.264799, 10.752493, 
                14.363411, 14.363411, 13.264799, 12.417501, 13.670264, 6.5661197, 12.977117, 11.798462, 9.968962, 12.753973, 
                11.47304, 11.227917, 7.6763024, 10.779892, 11.185357, 14.363411, 7.369478, 14.363411, 9.944571, 10.779892, 
                10.471591, 9.54313, 9.148476, 10.285873, 10.412168, 12.753973, 14.363411, 6.0308623, 13.670264, 10.725825, 
                12.977117, 11.272368, 7.663911, 9.137665, 10.236277, 13.264799, 6.715625, 10.9622135, 14.363411, 13.264799, 
                9.575919, 9.080208, 11.878505, 7.1863923, 9.366199, 8.854023, 9.874775, 8.2857685, 13.670264, 11.878505, 
                12.166186, 7.616999, 9.44343, 8.288065, 8.8104515, 8.347254, 7.4738197, 10.302968, 6.936267, 11.272368, 
                7.058223, 5.0138307, 12.753973, 10.173757, 9.863602, 11.318889, 9.54313, 10.996116, 12.753973, 7.8339925, 
                7.569945, 7.4427395, 5.560738, 12.753973, 10.725825, 10.252538, 9.307165, 8.491293, 7.9161053, 7.8849015, 
                7.782772, 6.3088884, 8.866243, 9.8308115, 14.363411, 10.8976755, 5.908519, 10.269067, 9.176025, 9.852551, 
                9.488214, 8.90809, 8.537411, 9.653881, 8.662968, 11.965516, 10.143904, 14.363411, 14.363411, 9.407584, 
                5.281472, 11.272368, 12.060826, 14.363411, 7.4135547, 8.920994, 9.618479, 8.891141, 14.363411, 12.060826, 
                11.965516, 10.9622135, 10.9622135, 14.363411, 5.658909, 8.934066, 12.571651, 8.614018, 11.655361, 13.264799, 
                10.996116, 13.670264, 8.965248, 9.326459, 11.144535, 14.363411, 6.0517673, 10.513264, 8.7430105, 10.338059, 
                13.264799, 6.878481, 9.065094, 8.87035, 14.363411, 9.92076, 6.5872955, 10.32036, 14.363411, 9.944571, 
                11.798462, 10.9622135, 11.031207, 7.652888, 4.334878, 13.670264, 13.670264, 14.363411, 10.725825, 12.417501, 
                14.363411, 13.264799, 11.655361, 10.338059, 13.264799, 12.753973, 8.206432, 8.916674, 8.59509, 14.363411, 
                7.376845, 11.798462, 11.530198, 11.318889, 11.185357, 5.0664344, 11.185357, 9.372978, 10.471591, 9.6629305, 
                11.367679, 8.73579, 9.080208, 11.724354, 5.04781, 7.3777695, 7.065643, 12.571651, 11.724354, 12.166186, 
                12.166186, 7.215852, 4.374113, 11.655361, 11.530198, 14.363411, 6.4993753, 11.031207, 8.344818, 10.513264, 
                10.032678, 14.363411, 14.363411, 4.5873594, 12.28397, 13.670264, 12.977117, 10.032678, 9.609821
            ], dtype = 'float32').reshape(1,600)
        self.binary_weight = np.array([1.6094379124341003, 0.22314355131420976], dtype = 'float32').reshape(1,2)
        self.num_classes = 600 # HOI
        self.num_binary  = 2 # existence (0 or 1) of HOI
        self.num_fc      = 1024
        self.scope       = 'resnet_v1_50'
        self.stride      = [16, ]
        self.lr          = tf.placeholder(tf.float32)
        if tf.__version__ == '1.1.0':
            self.blocks     = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256,   64, 1)] * 2 + [(256,   64, 2)]),
                               resnet_utils.Block('block2', resnet_v1.bottleneck,[(512,  128, 1)] * 3 + [(512,  128, 2)]),
                               resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                               resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3),
                               resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)]
        else: # we use tf 1.2.0 here, Resnet-50
            from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
            self.blocks = [resnet_v1_block('block1', base_depth=64,  num_units=3, stride=2), # a resnet_v1 bottleneck block
                           resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                           resnet_v1_block('block3', base_depth=256, num_units=6, stride=1), # feature former
                           resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
                           resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)]
Beispiel #20
0
  def build_network(self, sess, is_training=True):
    # select initializers
    if cfg.TRAIN.TRUNCATED:
      initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
    else:
      #initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
      initializer = tf.contrib.layers.xavier_initializer()
      initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)
    bottleneck = resnet_v1.bottleneck
    # choose different blocks for different number of layers
    if self._num_layers == 50:
      blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
      ]
    elif self._num_layers == 101:
      blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 22 + [(1024, 256, 1)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
      ]
    elif self._num_layers == 152:
      blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 7 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 35 + [(1024, 256, 1)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
      ]
    else:
      # other numbers are not supported
      raise NotImplementedError

    assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4)
    if cfg.RESNET.FIXED_BLOCKS == 3:
      with slim.arg_scope(resnet_arg_scope(is_training=False)):
        net = self.build_base()
        net_conv4, _ = resnet_v1.resnet_v1(net,
                                           blocks[0:cfg.RESNET.FIXED_BLOCKS],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope=self._resnet_scope)
    elif cfg.RESNET.FIXED_BLOCKS > 0:
      with slim.arg_scope(resnet_arg_scope(is_training=False)):
        net = self.build_base()
        net, _ = resnet_v1.resnet_v1(net,
                                     blocks[0:cfg.RESNET.FIXED_BLOCKS],
                                     global_pool=False,
                                     include_root_block=False,
                                     scope=self._resnet_scope)

      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        net_conv4, _ = resnet_v1.resnet_v1(net,
                                           blocks[cfg.RESNET.FIXED_BLOCKS:-1],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope=self._resnet_scope)
    else:  # cfg.RESNET.FIXED_BLOCKS == 0
      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        net = self.build_base()
        net_conv4, _ = resnet_v1.resnet_v1(net,
                                           blocks[0:-1],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope=self._resnet_scope)
    self._act_summaries.append(net_conv4)
    self._layers['head'] = net_conv4
    c=np.zeros((3,5,5))
    c[0]=[[-1,2,-2,2,-1],[2,-6,8,-6,2],[-2,8,-12,8,-2],[2,-6,8,-6,2],[-1,2,-2,2,-1]]
    c[0]=c[0]/12

    c[1][1][1]=-1
    c[1][1][2]=2
    c[1][1][3]=-1
    c[1][2][1]=2
    c[1][2][2]=-4
    c[1][2][3]=2
    c[1][3][1]=-1
    c[1][3][2]=2
    c[1][3][3]=-1
    c[1]=c[1]/4

    c[2][2][1]=1
    c[2][2][2]=-2
    c[2][2][3]=1
    c[2]=c[2]/2

    Wcnn=np.zeros((5,5,3,3))
    for i in range(3):
      #k=i%10+1
      #Wcnn[i]=[c[3*k-3],c[3*k-2],c[3*k-1]]
      Wcnn[:,:,0,i]=c[i]
      Wcnn[:,:,1,i]=c[i]
      Wcnn[:,:,2,i]=c[i]
    if True:
      with tf.variable_scope('noise'):
        #kernel = tf.get_variable('weights',
                              #shape=[5, 5, 3, 3],
                              #initializer=tf.constant_initializer(c))
        conv = tf.nn.conv2d(self.noise, Wcnn, [1, 1, 1, 1], padding='SAME',name='srm')
      self._layers['noise']=conv
      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        #srm_conv = tf.nn.tanh(conv, name='tanh')
        noise_net = resnet_utils.conv2d_same(conv, 64, 7, stride=2, scope='conv1')
        noise_net = tf.pad(noise_net, [[0, 0], [1, 1], [1, 1], [0, 0]])
        noise_net = slim.max_pool2d(noise_net, [3, 3], stride=2, padding='VALID', scope='pool1')
        #net_sum=tf.concat(3,[net_conv4,noise_net])
        noise_conv4, _ = resnet_v1.resnet_v1(noise_net,
                                           blocks[0:-1],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope='noise')
    with tf.variable_scope(self._resnet_scope, self._resnet_scope):
      # build the anchors for the image
      self._anchor_component()

      # rpn
      rpn = slim.conv2d(net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer,
                        scope="rpn_conv/3x3")
      self._act_summaries.append(rpn)
      rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training,
                                  weights_initializer=initializer,
                                  padding='VALID', activation_fn=None, scope='rpn_cls_score')
      # change it so that the score has 2 as its channel size
      rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape')
      rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape")
      rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob")
      rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training,
                                  weights_initializer=initializer,
                                  padding='VALID', activation_fn=None, scope='rpn_bbox_pred')
      if is_training:
        rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
        # Try to have a determinestic order for the computing graph, for reproducibility
        with tf.control_dependencies([rpn_labels]):
          rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")
      else:
        if cfg.TEST.MODE == 'nms':
          rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        elif cfg.TEST.MODE == 'top':
          rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        else:
          raise NotImplementedError
      # rcnn
      if cfg.POOLING_MODE == 'crop':
        pool5 = self._crop_pool_layer(net_conv4, rois, "pool5")
        self._layers['pool5']=pool5
        #pool5 = self._crop_pool_layer(net_sum, rois, "pool5")
      else:
        raise NotImplementedError
    if True:
      noise_pool5 = self._crop_pool_layer(noise_conv4, rois, "noise_pool5")
      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        noise_fc7, _ = resnet_v1.resnet_v1(noise_pool5,
                                   blocks[-1:],
                                   global_pool=False,
                                   include_root_block=False,
                                   scope='noise')
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
      fc7, _ = resnet_v1.resnet_v1(pool5,
                                   blocks[-1:],
                                   global_pool=False,
                                   include_root_block=False,
                                   scope=self._resnet_scope)
    self._layers['fc7']=fc7
    with tf.variable_scope('noise_pred'):

      bilinear_pool=compact_bilinear_pooling_layer(fc7,noise_fc7,2048*8,compute_size=16,sequential=False)
      fc7=tf.Print(fc7,[tf.shape(fc7)],message='Value of %s' % 'fc', summarize=4, first_n=1)
      bilinear_pool=tf.reshape(bilinear_pool, [-1,2048*8])
      bilinear_pool=tf.Print(bilinear_pool,[tf.shape(bilinear_pool)],message='Value of %s' % 'Blinear', summarize=4, first_n=1)
      bilinear_pool=tf.multiply(tf.sign(bilinear_pool),tf.sqrt(tf.abs(bilinear_pool)+1e-12))
      bilinear_pool=tf.nn.l2_normalize(bilinear_pool,dim=1)
      noise_cls_score = slim.fully_connected(bilinear_pool, self._num_classes, weights_initializer=tf.contrib.layers.xavier_initializer(),
                                       trainable=is_training, activation_fn=None, scope='cls_score')
      cls_prob = self._softmax_layer(noise_cls_score, "cls_prob")
      fc7 = tf.reduce_mean(fc7, axis=[1, 2])




      bbox_pred = slim.fully_connected(fc7, self._num_classes * 4, weights_initializer=initializer_bbox,
                                     trainable=is_training,
                                     activation_fn=None, scope='bbox_pred')

    self._predictions["rpn_cls_score"] = rpn_cls_score
    self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
    self._predictions["rpn_cls_prob"] = rpn_cls_prob
    self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
    self._predictions["cls_score"] = noise_cls_score
    self._predictions["cls_prob"] = cls_prob
    self._predictions["bbox_pred"] = bbox_pred
    self._predictions["rois"] = rois

    self._score_summaries.update(self._predictions)

    return rois, cls_prob,bbox_pred
    def build_network(self, sess, is_training=True):
        # select initializers
        if cfg.TRAIN.TRUNCATED:
            initializer = tf.truncated_normal_initializer(mean=0.0,
                                                          stddev=0.01)
            initializer_bbox = tf.truncated_normal_initializer(mean=0.0,
                                                               stddev=0.001)
        else:
            initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
            initializer_bbox = tf.random_normal_initializer(mean=0.0,
                                                            stddev=0.001)
        bottleneck = resnet_v1.bottleneck
        # choose different blocks for different number of layers
        if self._num_layers == 50:
            blocks = [
                resnet_utils.Block('block1', bottleneck,
                                   [(256, 64, 1)] * 2 + [(256, 64, 2)]),
                resnet_utils.Block('block2', bottleneck,
                                   [(512, 128, 1)] * 3 + [(512, 128, 2)]),
                # Use stride-1 for the last conv4 layer
                resnet_utils.Block('block3', bottleneck,
                                   [(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
            ]
        elif self._num_layers == 101:
            blocks = [
                resnet_utils.Block('block1', bottleneck,
                                   [(256, 64, 1)] * 2 + [(256, 64, 2)]),
                resnet_utils.Block('block2', bottleneck,
                                   [(512, 128, 1)] * 3 + [(512, 128, 2)]),
                # Use stride-1 for the last conv4 layer
                resnet_utils.Block('block3', bottleneck,
                                   [(1024, 256, 1)] * 22 + [(1024, 256, 1)]),
                resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
            ]
        elif self._num_layers == 152:
            blocks = [
                resnet_utils.Block('block1', bottleneck,
                                   [(256, 64, 1)] * 2 + [(256, 64, 2)]),
                resnet_utils.Block('block2', bottleneck,
                                   [(512, 128, 1)] * 7 + [(512, 128, 2)]),
                # Use stride-1 for the last conv4 layer
                resnet_utils.Block('block3', bottleneck,
                                   [(1024, 256, 1)] * 35 + [(1024, 256, 1)]),
                resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
            ]
        else:
            # other numbers are not supported
            raise NotImplementedError

        assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4)
        if cfg.RESNET.FIXED_BLOCKS == 3:
            with slim.arg_scope(resnet_arg_scope(is_training=False)):
                net = self.build_base()
                net_conv4, _ = resnet_v1.resnet_v1(
                    net,
                    blocks[0:cfg.RESNET.FIXED_BLOCKS],
                    global_pool=False,
                    include_root_block=False,
                    scope=self._resnet_scope)
        elif cfg.RESNET.FIXED_BLOCKS > 0:
            with slim.arg_scope(resnet_arg_scope(is_training=False)):
                net = self.build_base()
                net, _ = resnet_v1.resnet_v1(net,
                                             blocks[0:cfg.RESNET.FIXED_BLOCKS],
                                             global_pool=False,
                                             include_root_block=False,
                                             scope=self._resnet_scope)

            with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
                net_conv4, _ = resnet_v1.resnet_v1(
                    net,
                    blocks[cfg.RESNET.FIXED_BLOCKS:-1],
                    global_pool=False,
                    include_root_block=False,
                    scope=self._resnet_scope)
        else:  # cfg.RESNET.FIXED_BLOCKS == 0
            with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
                net = self.build_base()
                net_conv4, _ = resnet_v1.resnet_v1(net,
                                                   blocks[0:-1],
                                                   global_pool=False,
                                                   include_root_block=False,
                                                   scope=self._resnet_scope)

        self._act_summaries.append(net_conv4)
        self._layers['head'] = net_conv4
        with tf.variable_scope(self._resnet_scope, self._resnet_scope):
            # build the anchors for the image
            self._anchor_component()

            # rpn
            rpn = slim.conv2d(net_conv4,
                              512, [3, 3],
                              trainable=is_training,
                              weights_initializer=initializer,
                              scope="rpn_conv/3x3")
            self._act_summaries.append(rpn)
            rpn_cls_score = slim.conv2d(rpn,
                                        self._num_anchors * 2, [1, 1],
                                        trainable=is_training,
                                        weights_initializer=initializer,
                                        padding='VALID',
                                        activation_fn=None,
                                        scope='rpn_cls_score')
            # change it so that the score has 2 as its channel size
            rpn_cls_score_reshape = self._reshape_layer(
                rpn_cls_score, 2, 'rpn_cls_score_reshape')
            rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape,
                                                       "rpn_cls_prob_reshape")
            rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape,
                                               self._num_anchors * 2,
                                               "rpn_cls_prob")
            rpn_bbox_pred = slim.conv2d(rpn,
                                        self._num_anchors * 4, [1, 1],
                                        trainable=is_training,
                                        weights_initializer=initializer,
                                        padding='VALID',
                                        activation_fn=None,
                                        scope='rpn_bbox_pred')
            if is_training:
                rois, roi_scores = self._proposal_layer(
                    rpn_cls_prob, rpn_bbox_pred, "rois")
                rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
                # Try to have a determinestic order for the computing graph, for reproducibility
                with tf.control_dependencies([rpn_labels]):
                    rois, _ = self._proposal_target_layer(
                        rois, roi_scores, "rpn_rois")
            else:
                if cfg.TEST.MODE == 'nms':
                    rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred,
                                                   "rois")
                elif cfg.TEST.MODE == 'top':
                    rois, _ = self._proposal_top_layer(rpn_cls_prob,
                                                       rpn_bbox_pred, "rois")
                else:
                    raise NotImplementedError

            # rcnn
            if cfg.POOLING_MODE == 'crop':
                pool5 = self._crop_pool_layer(net_conv4, rois, "pool5")
            else:
                raise NotImplementedError

        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            fc7, _ = resnet_v1.resnet_v1(pool5,
                                         blocks[-1:],
                                         global_pool=False,
                                         include_root_block=False,
                                         scope=self._resnet_scope)

        with tf.variable_scope(self._resnet_scope, self._resnet_scope):
            # Average pooling done by reduce_mean
            fc7 = tf.reduce_mean(fc7, axis=[1, 2])
            cls_score = slim.fully_connected(fc7,
                                             self._num_classes,
                                             weights_initializer=initializer,
                                             trainable=is_training,
                                             activation_fn=None,
                                             scope='cls_score')
            cls_prob = self._softmax_layer(cls_score, "cls_prob")
            bbox_pred = slim.fully_connected(
                fc7,
                self._num_classes * 4,
                weights_initializer=initializer_bbox,
                trainable=is_training,
                activation_fn=None,
                scope='bbox_pred')
        self._predictions["rpn_cls_score"] = rpn_cls_score
        self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
        self._predictions["rpn_cls_prob"] = rpn_cls_prob
        self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
        self._predictions["cls_score"] = cls_score
        self._predictions["cls_prob"] = cls_prob
        self._predictions["bbox_pred"] = bbox_pred
        self._predictions["rois"] = rois

        self._score_summaries.update(self._predictions)

        return rois, cls_prob, bbox_pred
Beispiel #22
0
    def __init__(self, model_name):
        self.model_name = model_name
        self.visualize = {}
        self.test_visualize = {}
        self.intermediate = {}
        self.predictions = {}
        self.score_summaries = {}
        self.event_summaries = {}
        self.train_summaries = []
        self.losses = {}

        self.image       = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image')
        self.spatial     = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name = 'sp')
        # self.Hsp_boxes   = tf.placeholder(tf.float32, shape=[None, 5], name = 'Hsp_boxes')
        self.H_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='H_boxes')
        self.O_boxes     = tf.placeholder(tf.float32, shape=[None, 5], name = 'O_boxes')
        self.gt_class_H  = tf.placeholder(tf.float32, shape=[None, 24], name = 'gt_class_H')
        self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 24], name = 'gt_class_HO')
        self.gt_class_sp = tf.placeholder(tf.float32, shape=[None, 24], name = 'gt_class_sp')
        self.Mask_HO     = tf.placeholder(tf.float32, shape=[None, 24], name = 'HO_mask')
        self.Mask_H      = tf.placeholder(tf.float32, shape=[None, 24], name = 'H_mask')
        self.Mask_sp     = tf.placeholder(tf.float32, shape=[None, 24], name = 'sp_mask')
        self.gt_compose  = tf.placeholder(tf.float32, shape=[None, 222], name='gt_compose')
        self.gt_obj = tf.placeholder(tf.float32, shape=[None, 80], name='gt_obj')
        self.H_num       = tf.placeholder(tf.int32)
        self.image_id = tf.placeholder(tf.int32)
        self.num_classes = 24
        if self.model_name.__contains__('_t4_'):
            self.num_classes = 222
        if self.model_name.__contains__('_t5_'):
            self.verb_num_classes = 21
            self.num_classes = 222
        self.num_fc      = 1024
        self.verb_num_classes = 24
        self.obj_num_classes = 80
        self.scope       = 'resnet_v1_50'
        self.stride      = [16, ]
        # self.lr          = tf.placeholder(tf.float32)
        if tf.__version__ == '1.1.0':
            self.blocks     = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256,   64, 1)] * 2 + [(256,   64, 2)]),
                               resnet_utils.Block('block2', resnet_v1.bottleneck,[(512,  128, 1)] * 3 + [(512,  128, 2)]),
                               resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                               resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3),
                               resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)]
        else:
            from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
            self.blocks = [resnet_v1_block('block1', base_depth=64,  num_units=3, stride=2),
                           resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                           resnet_v1_block('block3', base_depth=256, num_units=6, stride=1),
                           resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
                           resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)]
            if self.model_name.__contains__('unique_weights'):
                print("unique_weights2")
                self.blocks.append(resnet_v1_block('block6', base_depth=512, num_units=3, stride=1))

        # remove 3, 17 22, 23 27
        self.HO_weight = np.array([3.3510249, 3.4552405, 4.0257854, 4.088436,
                                   3.4370995, 3.85842, 4.637334, 3.5487218, 3.536237,
                                   2.5578923, 3.342811, 3.8897269, 4.70686, 3.3952892,
                                   3.9706533, 4.504736, 1.4873443, 3.700363,
                                   4.1058283, 3.6298118, 5.0808263,
                                   1.520838, 3.3888445, 3.9899964], dtype='float32').reshape(1, 24)
        self.H_weight = np.array([4.0984106, 4.102459, 4.0414762, 4.0414762,
                                  3.9768186, 4.23686, 5.3542085, 3.723717, 3.4699364,
                                  2.4587274, 3.7167964, 4.08836, 5.050695, 3.9077065,
                                  4.534647, 3.4699364, 1.8585607, 3.9433942,
                                  3.9433942, 4.3523254, 5.138182,
                                  1.7807873, 4.080392, 4.5761204], dtype='float32').reshape(1, 24)
        self.reset_classes()
Beispiel #23
0
    def __init__(self):
        self.visualize = {}
        self.intermediate = {}
        self.predictions = {}
        self.score_summaries = {}
        self.event_summaries = {}
        self.train_summaries = []
        self.losses = {}

        self.image = tf.placeholder(tf.float32,
                                    shape=[1, None, None, 3],
                                    name='image')
        self.spatial = tf.placeholder(tf.float32,
                                      shape=[None, 64, 64, 3],
                                      name='sp')
        self.H_boxes = tf.placeholder(tf.float32,
                                      shape=[None, 5],
                                      name='H_boxes')
        self.O_boxes = tf.placeholder(tf.float32,
                                      shape=[None, 5],
                                      name='O_boxes')
        self.Part0 = tf.placeholder(tf.float32,
                                    shape=[None, 5],
                                    name='Part0_boxes')
        self.Part1 = tf.placeholder(tf.float32,
                                    shape=[None, 5],
                                    name='Part1_boxes')
        self.Part2 = tf.placeholder(tf.float32,
                                    shape=[None, 5],
                                    name='Part2_boxes')
        self.Part3 = tf.placeholder(tf.float32,
                                    shape=[None, 5],
                                    name='Part3_boxes')
        self.Part4 = tf.placeholder(tf.float32,
                                    shape=[None, 5],
                                    name='Part4_boxes')
        self.Part5 = tf.placeholder(tf.float32,
                                    shape=[None, 5],
                                    name='Part5_boxes')
        self.Part6 = tf.placeholder(tf.float32,
                                    shape=[None, 5],
                                    name='Part6_boxes')
        self.Part7 = tf.placeholder(tf.float32,
                                    shape=[None, 5],
                                    name='Part7_boxes')
        self.Part8 = tf.placeholder(tf.float32,
                                    shape=[None, 5],
                                    name='Part8_boxes')
        self.Part9 = tf.placeholder(tf.float32,
                                    shape=[None, 5],
                                    name='Part9_boxes')
        self.gt_binary_label = tf.placeholder(tf.float32,
                                              shape=[None, 2],
                                              name='gt_binary_label')
        self.gt_binary_label_10v = tf.placeholder(tf.float32,
                                                  shape=[None, 10, 2],
                                                  name='gt_binary_label')

        self.num_vec = 10
        self.H_num = tf.placeholder(tf.int32)
        self.binary_weight = np.array(
            [1.6094379124341003, 0.22314355131420976],
            dtype='float32').reshape(1, 2)
        self.num_classes = 600  # HOI
        self.num_binary = 2  # existence (0 or 1) of HOI
        self.num_fc = 1024
        self.scope = 'resnet_v1_50'
        self.stride = [
            16,
        ]
        self.lr = tf.placeholder(tf.float32)
        if tf.__version__ == '1.1.0':
            self.blocks = [
                resnet_utils.Block('block1', resnet_v1.bottleneck,
                                   [(256, 64, 1)] * 2 + [(256, 64, 2)]),
                resnet_utils.Block('block2', resnet_v1.bottleneck,
                                   [(512, 128, 1)] * 3 + [(512, 128, 2)]),
                resnet_utils.Block('block3', resnet_v1.bottleneck,
                                   [(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                resnet_utils.Block('block4', resnet_v1.bottleneck,
                                   [(2048, 512, 1)] * 3),
                resnet_utils.Block('block5', resnet_v1.bottleneck,
                                   [(2048, 512, 1)] * 3)
            ]
        else:  # we use tf 1.2.0 here, Resnet-50
            from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
            self.blocks = [
                resnet_v1_block('block1', base_depth=64, num_units=3,
                                stride=2),
                # a resnet_v1 bottleneck block
                resnet_v1_block('block2',
                                base_depth=128,
                                num_units=4,
                                stride=2),
                resnet_v1_block('block3',
                                base_depth=256,
                                num_units=6,
                                stride=1),  # feature former
                resnet_v1_block('block4',
                                base_depth=512,
                                num_units=3,
                                stride=1),
                resnet_v1_block('block5',
                                base_depth=512,
                                num_units=3,
                                stride=1)
            ]
Beispiel #24
0
    def _decide_blocks(self):
        # choose different blocks for different number of layers
        if self._num_layers == 50:
            if tf.__version__ == '1.1.0':
                self._blocks = [
                    resnet_utils.Block('block1', resnet_v1.bottleneck,
                                       [(256, 64, 1)] * 2 + [(256, 64, 2)]),
                    resnet_utils.Block('block2', resnet_v1.bottleneck,
                                       [(512, 128, 1)] * 3 + [(512, 128, 2)]),
                    resnet_utils.Block('block3', resnet_v1.bottleneck,
                                       [(1024, 256, 1)] * 5 +
                                       [(1024, 256, 1)]),
                    resnet_utils.Block('block4', resnet_v1.bottleneck,
                                       [(2048, 512, 1)] * 3)
                ]
            else:
                from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
                self._blocks = [
                    resnet_v1_block('block1',
                                    base_depth=64,
                                    num_units=3,
                                    stride=2),
                    resnet_v1_block('block2',
                                    base_depth=128,
                                    num_units=4,
                                    stride=2),
                    resnet_v1_block('block3',
                                    base_depth=256,
                                    num_units=6,
                                    stride=1),
                    resnet_v1_block('block4',
                                    base_depth=512,
                                    num_units=3,
                                    stride=1)
                ]

        elif self._num_layers == 101:
            self._blocks = [
                resnet_v1_block('block1', base_depth=64, num_units=3,
                                stride=2),
                resnet_v1_block('block2',
                                base_depth=128,
                                num_units=4,
                                stride=2),
                # use stride 1 for the last conv4 layer
                resnet_v1_block('block3',
                                base_depth=256,
                                num_units=23,
                                stride=1),
                resnet_v1_block('block4',
                                base_depth=512,
                                num_units=3,
                                stride=1)
            ]

        elif self._num_layers == 152:
            self._blocks = [
                resnet_v1_block('block1', base_depth=64, num_units=3,
                                stride=2),
                resnet_v1_block('block2',
                                base_depth=128,
                                num_units=8,
                                stride=2),
                # use stride 1 for the last conv4 layer
                resnet_v1_block('block3',
                                base_depth=256,
                                num_units=36,
                                stride=1),
                resnet_v1_block('block4',
                                base_depth=512,
                                num_units=3,
                                stride=1)
            ]

        else:
            # other numbers are not supported
            raise NotImplementedError
Beispiel #25
0
    def build_network(self, sess, is_training=True):
        # select initializers
        if cfg.TRAIN.TRUNCATED:
            initializer = tf.truncated_normal_initializer(mean=0.0,
                                                          stddev=0.01)
            initializer_bbox = tf.truncated_normal_initializer(mean=0.0,
                                                               stddev=0.001)
        else:
            initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
            initializer_bbox = tf.random_normal_initializer(mean=0.0,
                                                            stddev=0.001)
        bottleneck = resnet_v1.bottleneck
        blocks = [
            resnet_utils.Block('block1', bottleneck,
                               [(256, 64, 1)] * 2 + [(256, 64, 2)]),
            resnet_utils.Block('block2', bottleneck,
                               [(512, 128, 1)] * 3 + [(512, 128, 2)]),
            resnet_utils.Block('block3', bottleneck,
                               [(1024, 256, 1)] * 22 + [(1024, 256, 1)]),
            resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
        ]
        if cfg.RESNET.FIXED_BLOCKS > 0:
            with slim.arg_scope(resnet_arg_scope(is_training=False)):
                net, _ = resnet_v1.resnet_v1(self._image,
                                             blocks[0:cfg.RESNET.FIXED_BLOCKS],
                                             global_pool=False,
                                             include_root_block=True,
                                             scope='resnet_v1_101')
            with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
                net_conv5, _ = resnet_v1.resnet_v1(
                    net,
                    blocks[cfg.RESNET.FIXED_BLOCKS:-1],
                    global_pool=False,
                    include_root_block=False,
                    scope='resnet_v1_101')
        else:
            with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
                net_conv5, _ = resnet_v1.resnet_v1(self._image,
                                                   blocks[0:-1],
                                                   global_pool=False,
                                                   include_root_block=True,
                                                   scope='resnet_v1_101')

        self._act_summaries.append(net_conv5)
        self._layers['conv5_3'] = net_conv5
        with tf.variable_scope('resnet_v1_101',
                               'resnet_v1_101',
                               regularizer=tf.contrib.layers.l2_regularizer(
                                   cfg.TRAIN.WEIGHT_DECAY)):
            # build the anchors for the image
            self._anchor_component()

            # rpn
            # rpn = self._conv_layer_shape(net, [3, 3], 512, "rpn_conv/3x3", initializer, train)
            if cfg.TRAIN.BIAS_DECAY:
                biases_regularizer = None
            else:
                biases_regularizer = tf.no_regularizer
            rpn = slim.conv2d(net_conv5,
                              512, [3, 3],
                              trainable=is_training,
                              weights_initializer=initializer,
                              biases_regularizer=biases_regularizer,
                              biases_initializer=tf.constant_initializer(0.0),
                              scope="rpn_conv/3x3")
            self._act_summaries.append(rpn)
            rpn_cls_score = slim.conv2d(
                rpn,
                self._num_scales * 6, [1, 1],
                trainable=is_training,
                weights_initializer=initializer,
                biases_regularizer=biases_regularizer,
                biases_initializer=tf.constant_initializer(0.0),
                padding='VALID',
                activation_fn=None,
                scope='rpn_cls_score')
            # change it so that the score has 2 as its channel size
            rpn_cls_score_reshape = self._reshape_layer(
                rpn_cls_score, 2, 'rpn_cls_score_reshape')
            rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape,
                                                       "rpn_cls_prob_reshape")
            rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape,
                                               self._num_scales * 6,
                                               "rpn_cls_prob")
            rpn_bbox_pred = slim.conv2d(
                rpn,
                self._num_scales * 12, [1, 1],
                trainable=is_training,
                weights_initializer=initializer,
                biases_regularizer=biases_regularizer,
                biases_initializer=tf.constant_initializer(0.0),
                padding='VALID',
                activation_fn=None,
                scope='rpn_bbox_pred')
            if is_training:
                rois, roi_scores = self._proposal_layer(
                    rpn_cls_prob, rpn_bbox_pred, "rois")
                rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
                # Try to have a determinestic order for the computing graph, for reproducibility
                with tf.control_dependencies([rpn_labels]):
                    rois, _ = self._proposal_target_layer(
                        rois, roi_scores, "rpn_rois")
            else:
                if cfg.TEST.MODE == 'nms':
                    rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred,
                                                   "rois")
                elif cfg.TEST.MODE == 'top':
                    rois, _ = self._proposal_top_layer(rpn_cls_prob,
                                                       rpn_bbox_pred, "rois")
                else:
                    raise NotImplementedError

            # rcnn
            if cfg.POOLING_MODE == 'crop':
                pool5 = self._crop_pool_layer(net_conv5, rois, "pool5")
            else:
                raise NotImplementedError

        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            fc7, _ = resnet_v1.resnet_v1(pool5,
                                         blocks[-1:],
                                         global_pool=False,
                                         include_root_block=False,
                                         scope='resnet_v1_101')
        with tf.variable_scope('resnet_v1_101',
                               'resnet_v1_101',
                               regularizer=tf.contrib.layers.l2_regularizer(
                                   cfg.TRAIN.WEIGHT_DECAY)):
            # Average pooling done by reduce_mean
            fc7 = tf.reduce_mean(fc7, axis=[1, 2])
            cls_score = slim.fully_connected(
                fc7,
                self._num_classes,
                weights_initializer=initializer,
                trainable=is_training,
                biases_regularizer=biases_regularizer,
                activation_fn=None,
                scope='cls_score')
            cls_prob = self._softmax_layer(cls_score, "cls_prob")
            bbox_pred = slim.fully_connected(
                fc7,
                self._num_classes * 4,
                weights_initializer=initializer_bbox,
                trainable=is_training,
                biases_regularizer=biases_regularizer,
                activation_fn=None,
                scope='bbox_pred')
        self._predictions["rpn_cls_score"] = rpn_cls_score
        self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
        self._predictions["rpn_cls_prob"] = rpn_cls_prob
        self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
        self._predictions["cls_score"] = cls_score
        self._predictions["cls_prob"] = cls_prob
        self._predictions["bbox_pred"] = bbox_pred
        self._predictions["rois"] = rois

        self._score_summaries.update(self._predictions)

        return rois, cls_prob, bbox_pred
Beispiel #26
0
    def __init__(self,
                 num_sgnodes=17,
                 num_agnodes=6 + 17,
                 is_training=True,
                 use_skebox=False,
                 use_bodypart=False,
                 use_pm=False,
                 use_u=False,
                 use_sg=False,
                 use_sg_att=False,
                 use_ag=False,
                 use_ag_att=False,
                 use_binary=False,
                 use_Hsolo=False):
        # Control the network architecture
        self.use_skebox = use_skebox  # whether use skeleton box
        self.use_bp = use_bodypart  # whether use body part
        self.use_pm = use_pm  # whether use pose map
        self.use_u = use_u  # whether use union box
        self.use_sg = use_sg  # whether use spatial graph
        self.use_sg_att = use_sg_att  # whether use spatial graph attention
        self.use_ag = use_ag  # whether use appearance graph attention
        self.use_ag_att = use_ag_att  # whether use appearance attention
        self.use_binary = use_binary  # whether train binary module
        self.use_Hsolo = use_Hsolo

        # Annotation feed
        self.gt_binary_label = tf.placeholder(tf.float32, shape=[None, 1], name='gt_binary_label')
        self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 600], name='gt_class_HO')

        # Training utils
        self.predictions = {}
        self.losses = {}
        self.lr = tf.placeholder(tf.float32)
        self.num_binary = 1  # existence of HOI (0 or 1)
        self.num_classes = 600
        self.is_training = is_training
        self.keep_prob = 0.8 if self.is_training else 1

        # Training data feed
        self.image = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='image')
        self.head = tf.placeholder(tf.float32, shape=[1, None, None, 1024], name='head')
        self.H_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='H_boxes')
        self.O_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='O_boxes')
        self.U_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='U_boxes')
        self.skeboxes = tf.placeholder(tf.float32, shape=[None, 17, 5], name='part_boxes')
        self.bodyparts = tf.placeholder(tf.float32, shape=[None, 6, 5], name='bodypart_boxes')
        self.spatial = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name='sp')
        self.semantic = tf.placeholder(tf.float32, shape=[None, 768], name='semantic')
        self.H_num = tf.placeholder(tf.int32)
        self.H_num_neg = tf.placeholder(tf.int32)

        # ResNet 50 Network
        self.scope = 'resnet_v1_50'
        self.num_fc = 1024
        self.num_fc2 = 1024
        self.stride = [16, ]
        if tf.__version__ == '1.1.0':
            self.blocks = [resnet_utils.Block('block1', resnet_v1.bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
                           resnet_utils.Block('block2', resnet_v1.bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
                           resnet_utils.Block('block3', resnet_v1.bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                           resnet_utils.Block('block4', resnet_v1.bottleneck, [(2048, 512, 1)] * 3),
                           resnet_utils.Block('block5', resnet_v1.bottleneck, [(2048, 512, 1)] * 3)]
        else:
            from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
            self.blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
                           resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                           resnet_v1_block('block3', base_depth=256, num_units=6, stride=1),
                           resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
                           resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)]

        # GCN setting
        # Spatial GCN
        self.num_SGnodes = num_sgnodes
        self.SGraph = Graph(num_node=self.num_SGnodes)
        self.ori_As = tf.convert_to_tensor(self.SGraph.ori_A.astype(np.float32))
        self.As = tf.convert_to_tensor(self.SGraph.A.astype(np.float32))  # list for partition, N, V, V
        self.spatial_kernel_size = self.As.shape[0]
        self.SGinput = tf.placeholder(tf.float32, shape=[None, self.num_SGnodes, 1, 5, 1],
                                      name='Gnodes')  # [N, V, T, C, M] = [N, 19, 1, 3, 1]

        # Appearance GCN
        self.num_AGnodes = num_agnodes
        self.AGraph = Graph(num_node=self.num_AGnodes)
        self.ori_Aa = tf.convert_to_tensor(self.AGraph.ori_A.astype(np.float32))
        self.Aa = tf.convert_to_tensor(self.AGraph.A.astype(np.float32))  # list for partition, N, V, V

        self.binary_type = 0
  def build_network(self, sess, is_training=True):
    # select initializers
    if cfg.TRAIN.TRUNCATED:
      initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
    else:
      initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)
    bottleneck = resnet_v1.bottleneck
    # choose different blocks for different number of layers
    if self._num_layers == 50:
      blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
      ]
    elif self._num_layers == 101:
      blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 22 + [(1024, 256, 1)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
      ]
    elif self._num_layers == 152:
      blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 7 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 35 + [(1024, 256, 1)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
      ]
    else:
      # other numbers are not supported
      raise NotImplementedError
    with tf.variable_scope('noise'):

      conv=slim.conv2d(self.noise, num_outputs=3, kernel_size=[5,5], stride=1 , padding='SAME', activation_fn=None, trainable=is_training, scope='constrained_conv')



    self._layers['noise']=conv
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):

    #assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4)
        C_1 = self.build_base(conv)

        C_2, _ = resnet_v1.resnet_v1(C_1,
                                     blocks[0:1],
                                     global_pool=False,
                                     include_root_block=False,
                                     scope=self._resnet_scope)

    #net=self.cbam_module(inputs=net,name="cbam_layer_1")
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C_3, _ = resnet_v1.resnet_v1(C_2,
                                     blocks[1:2],
                                     global_pool=False,
                                     include_root_block=False,
                                     scope=self._resnet_scope)

    #net = self.cbam_module(inputs=net, name="cbam_layer_2")
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C_4, end_point = resnet_v1.resnet_v1(C_3,
                                     blocks[2:3],
                                     global_pool=False,
                                     include_root_block=False,
                                     scope=self._resnet_scope)

    # mask_C_4 = self.cbam_module(inputs=C_4, name="C_4")
    self.end_point=end_point
    self._act_summaries.append(C_4)
    self._layers['head'] = C_4
    self._layers['C1'] = C_1
    self._layers['C2'] = C_2
    self._layers['C3'] = C_3






    with tf.variable_scope(self._resnet_scope, self._resnet_scope):
      # build the anchors for the image
      self._anchor_component()

      # rpn
      rpn1 = slim.conv2d(C_4, 512, [3, 3], trainable=is_training, weights_initializer=initializer,
                        scope="rpn_conv/3x3")
      self._layers['rpn1'] = rpn1
      rpn = self.cbam_module(inputs=rpn1, name="rpn_conv1")
      self._layers['rpn'] = rpn



      self._act_summaries.append(rpn)
      rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training,
                                  weights_initializer=initializer,
                                  padding='VALID', activation_fn=None, scope='rpn_cls_score')
      self._layers['rpn_cls_score'] = rpn_cls_score



      # change it so that the score has 2 as its channel size
      rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape')


      rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape")


      rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob")

      rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training,
                                  weights_initializer=initializer,
                                  padding='VALID', activation_fn=None, scope='rpn_bbox_pred')

      if is_training:
        rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
        # Try to have a determinestic order for the computing graph, for reproducibility
        with tf.control_dependencies([rpn_labels]):
          rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")
      else:
        if cfg.TEST.MODE == 'nms':
          rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        elif cfg.TEST.MODE == 'top':
          rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        else:
          raise NotImplementedError
      # rcnn
      if cfg.POOLING_MODE == 'crop':
        pool5 = self._crop_pool_layer(C_4, rois, "pool5")
        #pool5 = self._crop_pool_layer(net_sum, rois, "pool5")
      else:
        raise NotImplementedError


    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
      fc7, end_point1 = resnet_v1.resnet_v1(pool5,
                                   blocks[-1:],
                                   global_pool=False,
                                   include_root_block=False,
                                   scope=self._resnet_scope)



    self._layers['fc7']=fc7
    # self._layers['pool5'] =pool5
    self.end_point1=end_point1


    with tf.variable_scope(self._resnet_scope, self._resnet_scope):

      cls_fc7 = tf.reduce_mean(fc7, axis=[1, 2])

      cls_score = slim.fully_connected(cls_fc7, self._num_classes, weights_initializer=initializer,
                                       trainable=is_training, activation_fn=None, scope='cls_score')


      cls_prob = self._softmax_layer(cls_score, "cls_prob")

      box_fc7=tf.reduce_mean(fc7, axis=[1, 2])
      bbox_pred = slim.fully_connected(box_fc7, self._num_classes * 4, weights_initializer=initializer_bbox,
                                     trainable=is_training,
                                     activation_fn=None, scope='bbox_pred')

    if cfg.USE_MASK is True:
      with tf.variable_scope('feature_fuse', 'feature_fuse'):
            mask_fuse = C_3 * 0.5 + rpn * 0.5

            feature_fuse = slim.conv2d(mask_fuse, 1024, [1, 1], padding='VALID', trainable=is_training,
                                       weights_initializer=initializer, scope='mask_fuse')
      mask_box, indices = self._proposal_mask_layer(cls_prob, bbox_pred, rois, 'mask_proposal')
      mask_pool5 = self._crop_pool_layer(feature_fuse, mask_box, "mask_pool5")

      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
          mask_fc7, _ = resnet_v1.resnet_v1(mask_pool5,
                                            blocks[-1:],
                                            global_pool=False,
                                            include_root_block=False,
                                            scope='mask_conv')

      self._act_summaries.append(mask_fc7)

      with tf.variable_scope('mask_predict', 'mask_predict'):

          upsampled_features=slim.conv2d_transpose(mask_fc7,256,2,2,activation_fn=None)
          self._act_summaries.append(upsampled_features)
          upsampled_features = slim.conv2d(upsampled_features, 64, [1, 1], normalizer_fn=slim.batch_norm, activation_fn=None,padding='VALID')
          self._act_summaries.append(upsampled_features)
          upsampled_features = slim.batch_norm(upsampled_features, activation_fn=None)
          self._act_summaries.append(upsampled_features)
          upsampled_features = tf.nn.relu(upsampled_features)
          self._act_summaries.append(upsampled_features)

          mask_predictions = slim.conv2d(upsampled_features, num_outputs=2,activation_fn=None,
                                         kernel_size=[1, 1],padding='VALID')
          self._act_summaries.append(mask_predictions)

      self._predictions["mask_out"] = tf.expand_dims(mask_predictions[:, :, :, 1], 3)
      mask_softmax=tf.nn.softmax(mask_predictions)


      self._predictions["mask_softmaxbg"] = tf.expand_dims(mask_softmax[:, :, :, 0], 3)
      self._predictions["mask_softmaxfg"] = tf.expand_dims(mask_softmax[:, :, :, 1], 3)

      self._predictions["rpn_cls_score"] = rpn_cls_score
      self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
      self._predictions["rpn_cls_prob"] = rpn_cls_prob
      self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
      self._predictions["cls_score"] = cls_score
      self._predictions["cls_prob"] = cls_prob
      self._predictions["bbox_pred"] = bbox_pred
      self._predictions["rois"] = rois
      self._predictions["mask_pred"] = mask_predictions

      self._score_summaries.update(self._predictions)

      return rois, cls_prob, bbox_pred, mask_predictions
    else:
      self._predictions["rpn_cls_score"] = rpn_cls_score
      self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
      self._predictions["rpn_cls_prob"] = rpn_cls_prob
      self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
      self._predictions["cls_score"] = cls_score
      self._predictions["cls_prob"] = cls_prob
      self._predictions["bbox_pred"] = bbox_pred
      self._predictions["rois"] = rois

      self._score_summaries.update(self._predictions)

      return rois, cls_prob, bbox_pred
 def __init__(self):
     self.visualize = {}
     self.intermediate = {}
     self.predictions = {}
     self.score_summaries = {}
     self.event_summaries = {}
     self.train_summaries = []
     self.losses = {}
     self.image = tf.placeholder(tf.float32,
                                 shape=[1, None, None, 3],
                                 name='image')  #scene stream
     self.H_boxes = tf.placeholder(tf.float32,
                                   shape=[None, 5],
                                   name='H_boxes')  # Human stream
     self.P_boxes = tf.placeholder(tf.float32,
                                   shape=[None, 10, 5],
                                   name='P_boxes')  # PaSta stream
     self.gt_verb = tf.placeholder(tf.float32,
                                   shape=[None, 80],
                                   name='gt_class_verb')  # target verb
     self.H_num = tf.placeholder(tf.int32)
     self.verb_weight = np.array(verb80, dtype='float32').reshape(1, -1)
     self.num_classes = 80  # HOI
     self.num_pasta0 = 12  # pasta0 ankle
     self.num_pasta1 = 10  # pasta1 knee
     self.num_pasta2 = 5  # pasta2 hip
     self.num_pasta3 = 31  # pasta3 hand
     self.num_pasta4 = 5  # pasta4 shoulder
     self.num_pasta5 = 13  # pasta5 head
     self.num_fc = 1024
     self.scope = 'resnet_v1_50'
     self.stride = [
         16,
     ]
     self.lr = tf.placeholder(tf.float32)
     if tf.__version__ == '1.1.0':
         self.blocks = [
             resnet_utils.Block('block1', resnet_v1.bottleneck,
                                [(256, 64, 1)] * 2 + [(256, 64, 2)]),
             resnet_utils.Block('block2', resnet_v1.bottleneck,
                                [(512, 128, 1)] * 3 + [(512, 128, 2)]),
             resnet_utils.Block('block3', resnet_v1.bottleneck,
                                [(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
             resnet_utils.Block('block4', resnet_v1.bottleneck,
                                [(2048, 512, 1)] * 3),
             resnet_utils.Block('block5', resnet_v1.bottleneck,
                                [(2048, 512, 1)] * 3)
         ]
     else:  # we use tf 1.2.0 here, Resnet-50
         from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
         self.blocks = [
             resnet_v1_block('block1', base_depth=64, num_units=3,
                             stride=2),  # a resnet_v1 bottleneck block
             resnet_v1_block('block2',
                             base_depth=128,
                             num_units=4,
                             stride=2),
             resnet_v1_block('block3',
                             base_depth=256,
                             num_units=6,
                             stride=1),  # feature former
             resnet_v1_block('block4',
                             base_depth=512,
                             num_units=3,
                             stride=1),
             resnet_v1_block('block5',
                             base_depth=512,
                             num_units=3,
                             stride=1)
         ]
Beispiel #29
0
    def __init__(self,
                 in_channels=3,
                 num_classes=0,
                 num_nodes=17 + 2,
                 edge_importance_weighting=True,
                 is_training=True,
                 num_fc=1024,
                 posetype=1,
                 bi_posegraph=False,
                 bodypart=False,
                 binary=False,
                 posemap=False,
                 posegraph=False,
                 semantic=False,
                 data_bn=True):
        self.predictions = {}
        self.train_summaries = []
        self.losses = {}
        self.lr = tf.placeholder(tf.float32)
        self.num_binary = 1  # existence of HOI (0 or 1)
        self.num_classes = 600
        self.gt_binary_label = tf.placeholder(tf.float32,
                                              shape=[None, 1],
                                              name='gt_binary_label')
        self.gt_class_HO = tf.placeholder(tf.float32,
                                          shape=[None, 600],
                                          name='gt_class_HO')
        self.is_training = is_training
        if self.is_training:
            self.keep_prob = cfg.TRAIN_DROP_OUT_BINARY
            self.keep_prob_tail = .5
        else:
            self.keep_prob = 1
            self.keep_prob_tail = 1

        self.image = tf.placeholder(tf.float32,
                                    shape=[1, None, None, 3],
                                    name='image')
        self.head = tf.placeholder(tf.float32,
                                   shape=[1, None, None, 1024],
                                   name='head')
        self.H_boxes = tf.placeholder(tf.float32,
                                      shape=[None, 5],
                                      name='H_boxes')
        self.O_boxes = tf.placeholder(tf.float32,
                                      shape=[None, 5],
                                      name='O_boxes')
        self.partboxes = tf.placeholder(tf.float32,
                                        shape=[None, 17, 5],
                                        name='part_boxes')
        self.semantic = tf.placeholder(tf.float32,
                                       shape=[None, 1024],
                                       name='semantic_feat')
        self.H_num = tf.placeholder(tf.int32)

        # Control the network architecture
        self.bodypart = bodypart
        self.binary = binary
        self.posemap = posemap
        self.posegraph = posegraph
        self.bi_posegraph = bi_posegraph
        self.posetype = posetype
        self.semantic_flag = semantic
        if self.posetype == 1:
            self.spatial = tf.placeholder(tf.float32,
                                          shape=[None, 64, 64, 2],
                                          name='sp')
        else:
            self.spatial = tf.placeholder(tf.float32,
                                          shape=[None, 64, 64, 3],
                                          name='sp')

        # ResNet 50 Network
        self.scope = 'resnet_v1_50'
        self.num_fc = 1024
        self.num_fc2 = num_fc
        self.stride = [
            16,
        ]
        if tf.__version__ == '1.1.0':
            self.blocks = [
                resnet_utils.Block('block1', resnet_v1.bottleneck,
                                   [(256, 64, 1)] * 2 + [(256, 64, 2)]),
                resnet_utils.Block('block2', resnet_v1.bottleneck,
                                   [(512, 128, 1)] * 3 + [(512, 128, 2)]),
                resnet_utils.Block('block3', resnet_v1.bottleneck,
                                   [(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                resnet_utils.Block('block4', resnet_v1.bottleneck,
                                   [(2048, 512, 1)] * 3),
                resnet_utils.Block('block5', resnet_v1.bottleneck,
                                   [(2048, 512, 1)] * 3)
            ]
        else:
            from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
            self.blocks = [
                resnet_v1_block('block1', base_depth=64, num_units=3,
                                stride=2),
                resnet_v1_block('block2',
                                base_depth=128,
                                num_units=4,
                                stride=2),
                resnet_v1_block('block3',
                                base_depth=256,
                                num_units=6,
                                stride=1),
                resnet_v1_block('block4',
                                base_depth=512,
                                num_units=3,
                                stride=1),
                resnet_v1_block('block5',
                                base_depth=512,
                                num_units=3,
                                stride=1)
            ]

        # GCN setting
        self.num_nodes = num_nodes
        self.c = in_channels
        self.data_bn = data_bn
        self.strategy = 'spatial'
        self.graph = Graph(strategy=self.strategy)
        self.A = tf.convert_to_tensor(self.graph.A.astype(
            np.float32))  # [None, num_nodes, num_nodes]
        self.spatial_kernel_size = self.A.shape[0]
        # [N, C, T, V, M] = [N, 3, 1, 19, 1]
        self.Gnodes = tf.placeholder(
            tf.float32,
            shape=[None, self.c, 1, self.num_nodes, 1],
            name='Gnodes')

        # ST_GCN
        self.depth_st_gcn_networks = 10
  def build_network(self, sess, is_training=True, ver=''):
    # select initializers
    if cfg.FLAGS.initializer:
      initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
    else:
      initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)
    bottleneck = resnet_v1.bottleneck
    # choose different blocks for different number of layers

    def resnet_v1_block(scope, bottleneck, base_depth, num_units, stride):
      return resnet_utils.Block(scope, bottleneck, [{
          'depth': base_depth * 4,
          'depth_bottleneck': base_depth,
          'stride': 1
      }] * (num_units - 1) + [{
          'depth': base_depth * 4,
          'depth_bottleneck': base_depth,
          'stride': stride
      }])

    if self._num_layers == 50:
        blocks = [resnet_v1_block('block1'+ver, bottleneck, base_depth=64, num_units=3, stride=2),
                       resnet_v1_block('block2'+ver, bottleneck, base_depth=128, num_units=4, stride=2),
                       # use stride 1 for the last conv4 layer
                       resnet_v1_block('block3'+ver, bottleneck, base_depth=256, num_units=6, stride=1),
                       resnet_v1_block('block4'+ver, bottleneck, base_depth=512, num_units=3, stride=1),
        ]

    elif self._num_layers == 101:
      blocks = [
        resnet_utils.Block('block1'+ver, bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2'+ver, bottleneck,
                           [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3'+ver, bottleneck,
                           [(1024, 256, 1)] * 22 + [(1024, 256, 1)]),
        resnet_utils.Block('block4'+ver, bottleneck, [(2048, 512, 1)] * 3)
      ]
    elif self._num_layers == 152:
      blocks = [
        resnet_utils.Block('block1'+ver, bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2'+ver, bottleneck,
                           [(512, 128, 1)] * 7 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3'+ver, bottleneck,
                           [(1024, 256, 1)] * 35 + [(1024, 256, 1)]),
        resnet_utils.Block('block4'+ver, bottleneck, [(2048, 512, 1)] * 3)
      ]
    else:
      # other numbers are not supported
      raise NotImplementedError

    assert (0 <= cfg.FLAGS.fixed_blocks < 4)
    if cfg.FLAGS.fixed_blocks == 3:
      with slim.arg_scope(resnet_arg_scope(is_training=False)):
        net = self.build_base()
        net_conv4, _ = resnet_v1.resnet_v1(net,
                                           blocks[0:cfg.FLAGS.fixed_blocks],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope=self._resnet_scope)
        net_noise = self.build_base(ver='n', init=initializer)
        net_conv4_noise, _ = resnet_v1.resnet_v1(net_noise,
                                           blocks[0:cfg.FLAGS.fixed_blocks],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope=self._resnet_scope, reuse=True)

    elif cfg.FLAGS.fixed_blocks > 0:
      with slim.arg_scope(resnet_arg_scope(is_training=False)):
        net = self.build_base()
        net, _ = resnet_v1.resnet_v1(net,
                                     blocks[0:cfg.FLAGS.fixed_blocks],
                                     global_pool=False,
                                     include_root_block=False,
                                     scope=self._resnet_scope, reuse=tf.AUTO_REUSE)

        net_noise = self.build_base(ver='n', init=initializer)
        net_noise, _ = resnet_v1.resnet_v1(net_noise,
                                            blocks[0:cfg.FLAGS.fixed_blocks],
                                            global_pool=False,
                                            include_root_block=False,
                                            scope=self._resnet_scope, reuse=tf.AUTO_REUSE)

      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        net_conv4, _ = resnet_v1.resnet_v1(net,
                                           blocks[cfg.FLAGS.fixed_blocks:-1],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope=self._resnet_scope, reuse=tf.AUTO_REUSE)
        net_conv4_noise, _ = resnet_v1.resnet_v1(net_noise,
                                            blocks[0:cfg.FLAGS.fixed_blocks],
                                            global_pool=False,
                                            include_root_block=False,
                                            scope=self._resnet_scope, reuse=tf.AUTO_REUSE)
    else:  # cfg.RESNET.FIXED_BLOCKS == 0
      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        net = self.build_base()
        net_conv4, _ = resnet_v1.resnet_v1(net,
                                           blocks[0:-1],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope=self._resnet_scope, reuse=tf.AUTO_REUSE)
        net_noise = self.build_base(ver='n', init=initializer)
        net_conv4_noise, _ = resnet_v1.resnet_v1(net_noise,
                                           blocks[0:cfg.FLAGS.fixed_blocks],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope=self._resnet_scope, reuse=tf.AUTO_REUSE)

    self._act_summaries.append(net_conv4)
    self._layers['head'] = net_conv4
    with tf.variable_scope(self._resnet_scope, self._resnet_scope):
      # build the anchors for the image
      self._anchor_component()

      # rpn
      rpn = slim.conv2d(net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer,
                        scope="rpn_conv/3x3")
      self._act_summaries.append(rpn)
      rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training,
                                  weights_initializer=initializer,
                                  padding='VALID', activation_fn=None, scope='rpn_cls_score')
      # change it so that the score has 2 as its channel size
      rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape')
      rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape")
      rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob")
      rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training,
                                  weights_initializer=initializer,
                                  padding='VALID', activation_fn=None, scope='rpn_bbox_pred')
      if is_training:
        rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
        # Try to have a determinestic order for the computing graph, for reproducibility
        with tf.control_dependencies([rpn_labels]):
          rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")
      else:
        if cfg.FLAGS.test_mode == 'nms':
          rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        elif cfg.FLAGS.test_mode == 'top':
          rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
        else:
          raise NotImplementedError

      # rcnn
      if cfg.FLAGS.POOLING_MODE == 'crop':
        pool5 = self._crop_pool_layer(net_conv4, rois, "pool5")
        pool5_forNoise = self._crop_pool_layer(net_conv4_noise, rois, "pool5n")
        # Compact Bilinear Pooling
        cbp = compact_bilinear_pooling_layer(pool5, pool5_forNoise, 1024)
        cbp_flat = slim.flatten(cbp, scope='cbp_flatten')
      else:
        raise NotImplementedError

    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
      fc7 = slim.fully_connected(cbp_flat, 4096, scope='fc6')

    with tf.variable_scope(self._resnet_scope, self._resnet_scope):
      # Average pooling done by reduce_mean
      #fc7 = tf.reduce_mean(fc7, axis=[1, 2])
      cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer,
                                       trainable=is_training, activation_fn=None, scope='cls_score')
      cls_prob = self._softmax_layer(cls_score, "cls_prob")
      bbox_pred = slim.fully_connected(fc7, self._num_classes * 4, weights_initializer=initializer_bbox,
                                       trainable=is_training,
                                       activation_fn=None, scope='bbox_pred')
    self._predictions["rpn_cls_score"] = rpn_cls_score
    self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
    self._predictions["rpn_cls_prob"] = rpn_cls_prob
    self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
    self._predictions["cls_score"] = cls_score
    self._predictions["cls_prob"] = cls_prob
    self._predictions["bbox_pred"] = bbox_pred
    self._predictions["rois"] = rois

    self._score_summaries.update(self._predictions)

    return rois, cls_prob, bbox_pred