def get_logits(self, x): with argscope( [Conv2D, MaxPooling, AvgPooling, GlobalAvgPooling, BatchNorm], data_format='channels_first'): x = res_init_block(x=x, in_channels=self.in_channels, out_channels=self.init_block_channels, name="features/init_block") in_channels = self.init_block_channels for i, channels_per_stage in enumerate(self.channels): for j, out_channels in enumerate(channels_per_stage): strides = 2 if (j == 0) and (i != 0) else 1 x = res_unit("features/stage{}/unit{}".format( i + 1, j + 1), x, in_channels=in_channels, out_channels=out_channels, strides=strides, bottleneck=self.bottleneck, conv1_stride=self.conv1_stride, use_se=self.use_se) in_channels = out_channels # x = AvgPooling( # "final_pool", # x, # pool_size=7, # strides=1) x = GlobalAvgPooling("features/final_pool", x) x = FullyConnected("output", x, units=self.classes) return x
def resnet_bottleneck(l, ch_out, stride, stride_first=False): """ stride_first: original resnet put stride on first conv. fb.resnet.torch put stride on second conv. """ shortcut = l l = Conv2D('conv1', l, ch_out, 1, strides=stride if stride_first else 1, activation=BNReLU) l_3x3 = Conv2D('conv2', l, ch_out, 3, strides=1 if stride_first else stride, activation=tf.identity) shape = l_3x3.get_shape().as_list() l_gap = GlobalAvgPooling('gap', l) l_gap = FullyConnected('fc1', l_gap, ch_out, activation=tf.nn.relu) l_gap = FullyConnected('fc2', l_gap, ch_out, activation=tf.identity) l_gap = tf.reshape(l_gap, [-1, ch_out, 1, 1]) l_gap = tf.tile(l_gap, [1, 1, shape[2], shape[3]]) l_concat = tf.concat([l_3x3, l_gap], axis = 1) l_concat = Conv2D('conv_c1', l_concat, ch_out, 1, strides=1, activation=tf.nn.relu) l_concat = Conv2D('conv_c2', l_concat, ch_out, 1, strides=1, activation=tf.identity) l_concat = tf.sigmoid(l_concat) l = l_3x3 + l_gap * l_concat l = BNReLU('conv2',l) l = Conv2D('conv3', l, ch_out * 4, 1, activation=get_bn(zero_init=True)) return l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False))
def se_resnet_bottleneck(l, ch_out, stride, rate): shortcut = l l = Conv2D('conv1', l, ch_out, 1, dilation_rate=rate, nl=BNReLU) l = Conv2D('conv2', l, ch_out, 3, stride=stride, dilation_rate=rate, nl=BNReLU) l = Conv2D('conv3', l, ch_out * 4, 1, dilation_rate=rate, nl=get_bn(zero_init=True)) squeeze = GlobalAvgPooling('gap', l) squeeze = FullyConnected('fc1', squeeze, ch_out // 4, nl=tf.nn.relu) squeeze = FullyConnected('fc2', squeeze, ch_out * 4, nl=tf.nn.sigmoid) data_format = get_arg_scope()['Conv2D']['data_format'] ch_ax = 1 if data_format == 'NCHW' else 3 shape = [-1, 1, 1, 1] shape[ch_ax] = ch_out * 4 l = l * tf.reshape(squeeze, shape) return l + resnet_shortcut( shortcut, ch_out * 4, stride, rate, nl=get_bn(zero_init=False))
def resnet_backbone(image, num_blocks, group_func, block_func): """ Sec 5.1: We adopt the initialization of [15] for all convolutional layers. TensorFlow does not have the true "MSRA init". We use variance_scaling as an approximation. """ with argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out')): l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l) logits = FullyConnected( 'linear', l, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) """ Sec 5.1: The 1000-way fully-connected layer is initialized by drawing weights from a zero-mean Gaussian with standard deviation of 0.01. """ return logits
def resnet_backbone(images, num_blocks, grp_fun, blck_fun, nfeatures, bn=True): # from tf.contrib.layers import variance_scaling_initializer with argscope(Conv2D, nl=tf.identity, use_bias=False, W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): first_input = images[0] second_input = images[1] act = BNReLU if bn else tf.nn.relu x = Conv2D('conv0', first_input, 64, 7, stride=2, nl=act) y = Conv2D('conv1', second_input, 64, 7, stride=2, nl=act) # stack second_input into channel-dimension of conv0 output x = tf.concat([x, y], axis=3, name='stack_second_input') x = MaxPooling('pool0', x, shape=3, stride=2, padding='SAME') x = grp_fun(x, 'group0', blck_fun, 64, num_blocks[0], 1, bn=bn) x = grp_fun(x, 'group1', blck_fun, 128, num_blocks[1], 2, bn=bn) x = grp_fun(x, 'group2', blck_fun, 256, num_blocks[2], 2, bn=bn) x = grp_fun(x, 'group3', blck_fun, 256, num_blocks[3], 1, bn=bn) x = GlobalAvgPooling('gap', x) x = FullyConnected('fc0', x, 1000) # NOTE linear activations gewollt ? x = FullyConnected('fc1', x, 500) # NOTE linear activations gewollt ? x = FullyConnected( 'linear', x, nfeatures, nl=tf.identity ) # NOTE sieht aus als ging Fabi von non-linear act. als default aus # NOTE die letzten 3 FC layers werden linear aktiviert (siehe Graph in TB) d.h. ein einzelnes FC layer sollte ausreichen (evtl. bessere Laufzeit) return x
def se_resnet_bottleneck(option, l, ch_out, stride, adl_index=None): shortcut = l l = Conv2D('conv1', l, ch_out, 1, activation=BNReLU) l = Conv2D('conv2', l, ch_out, 3, strides=stride, activation=BNReLU) l = Conv2D('conv3', l, ch_out * 4, 1, activation=get_bn(zero_init=True)) squeeze = GlobalAvgPooling('gap', l) squeeze = FullyConnected('fc1', squeeze, ch_out // 4, activation=tf.nn.relu) squeeze = FullyConnected('fc2', squeeze, ch_out * 4, activation=tf.nn.sigmoid) ch_ax = 1 if is_data_format_nchw() else 3 shape = [-1, 1, 1, 1] shape[ch_ax] = ch_out * 4 l = l * tf.reshape(squeeze, shape) out = l + resnet_shortcut( shortcut, ch_out * 4, stride, activation=get_bn()) out = tf.nn.relu(out) if option.gating_position[adl_index]: out = gating_op(out, option) return out
def get_logits(image, num_classes=1000): # with ssdnet_argscope(): # dropblock if get_current_tower_context().is_training: dropblock_keep_prob = tf.get_variable('dropblock_keep_prob', (), dtype=tf.float32, trainable=False) else: dropblock_keep_prob = None l = image #tf.transpose(image, perm=[0, 2, 3, 1]) # conv1 l = Conv2D('conv1', l, 16, 4, strides=2, activation=None, padding='SAME') with tf.variable_scope('conv1'): l = BNReLU(tf.concat([l, -l], axis=-1)) l = MaxPooling('pool1', l, 2) # conv2 l = LinearBottleneck('conv2', l, 48, 24, 5, t=1, use_ab=True) l = l + LinearBottleneck('conv3', l, 24, 24, 5, t=2, use_ab=True) ch_all = [48, 72, 96] iters = [2, 4, 4] mults = [3, 4, 6] bsize = [3, 3, 3] hlist = [] for ii, (ch, it, mu, bs) in enumerate(zip(ch_all, iters, mults, bsize)): use_ab = (ii < 2) for jj in range(it): name = 'inc{}/{}'.format(ii, jj) stride = 2 if jj == 0 else 1 swap_block = True if jj % 2 == 1 else False l = inception(name, l, ch, stride, t=mu, swap_block=swap_block, use_ab=use_ab) l = DropBlock('inc{}/drop'.format(ii), l, keep_prob=dropblock_keep_prob, block_size=bs) l = Conv2D('convf', l, 96 * 6, 1, activation=None) l = BatchNorm('convf/bn', l) l = tf.nn.relu(l) l = GlobalAvgPooling('poolf', l) fc = FullyConnected('fc', l, 1280, activation=BNReLU) fc = Dropout(fc, keep_prob=0.9) logits = FullyConnected('linear', fc, num_classes, use_bias=True) return logits
def vgg_gap(image, option, importance=False): ctx = get_current_tower_context() is_training = ctx.is_training with argscope(Conv2D, kernel_initializer=tf.variance_scaling_initializer(scale=2.)), \ argscope([Conv2D, MaxPooling, BatchNorm, GlobalAvgPooling], data_format='channels_first'): l = convnormrelu(image, 'conv1_1', 64, option) if option.attdrop[11]: l = ADL(11, l, option) l = convnormrelu(l, 'conv1_2', 64, option) if option.attdrop[12]: l = ADL(12, l, option) l = MaxPooling('pool1', l, 2) if option.attdrop[1]: l = ADL(1, l, option) l = convnormrelu(l, 'conv2_1', 128, option) if option.attdrop[21]: l = ADL(21, l, option) l = convnormrelu(l, 'conv2_2', 128, option) if option.attdrop[22]: l = ADL(21, l, option) l = MaxPooling('pool2', l, 2) if option.attdrop[2]: l = ADL(2, l, option) l = convnormrelu(l, 'conv3_1', 256, option) if option.attdrop[31]: l = ADL(31, l, option) l = convnormrelu(l, 'conv3_2', 256, option) if option.attdrop[32]: l = ADL(32, l, option) l = convnormrelu(l, 'conv3_3', 256, option) if option.attdrop[33]: l = ADL(33, l, option) l = MaxPooling('pool3', l, 2) if option.attdrop[3]: l = ADL(3, l, option) l = convnormrelu(l, 'conv4_1', 512, option) if option.attdrop[41]: l = ADL(41, l, option) l = convnormrelu(l, 'conv4_2', 512, option) if option.attdrop[42]: l = ADL(42, l, option) l = convnormrelu(l, 'conv4_3', 512, option) if option.attdrop[43]: l = ADL(43, l, option) l = MaxPooling('pool4', l, 2) if option.attdrop[4]: l = ADL(4, l, option) l = convnormrelu(l, 'conv5_1', 512, option) if option.attdrop[51]: l = ADL(51, l, option) l = convnormrelu(l, 'conv5_2', 512, option) if option.attdrop[52]: l = ADL(52, l, option) l = convnormrelu(l, 'conv5_3', 512, option) if option.attdrop[53]: l = ADL(53, l, option) convmaps = convnormrelu(l, 'new', 1024, option) if option.attdrop[6]: l = ADL(6, l, option) pre_logits = GlobalAvgPooling('gap', convmaps) logits = FullyConnected( 'linear', pre_logits, option.classnum, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return logits, convmaps
def resnet_conv5(image): with argscope([Conv2D, GlobalAvgPooling, BatchNorm], data_format='NCHW'), \ argscope(Conv2D, nl=tf.identity, use_bias=False), \ argscope(BatchNorm, use_local_stat=False): # 14x14: l = resnet_group(image, 'group3', resnet_bottleneck, 512, 3, stride=2) l = GlobalAvgPooling('gap', l) return l
def roi_heads(self, image, features, proposals, targets): image_shape2d = tf.shape(image)[2:] # h,w featuremap = features[0] gt_boxes, gt_labels, *_ = targets if self.training: # sample proposal boxes in training proposals = sample_fast_rcnn_targets(proposals.boxes, gt_boxes, gt_labels) # The boxes to be used to crop RoIs. # Use all proposal boxes in inference boxes_on_featuremap = proposals.boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCKS[-1]) # nxcx7x7 # Keep C5 feature to be shared with mask branch feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs('fastrcnn', feature_gap, cfg.DATA.NUM_CATEGORY) fastrcnn_head = FastRCNNHead(proposals, fastrcnn_box_logits, fastrcnn_label_logits, gt_boxes, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) if self.training: all_losses = fastrcnn_head.losses() if cfg.MODE_MASK: gt_masks = targets[2] # maskrcnn loss # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, proposals.fg_inds()) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY, num_convs=0) # #fg x #cat x 14x14 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), proposals.fg_boxes(), proposals.fg_inds_wrt_gt, 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') all_losses.append(maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg)) return all_losses else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores(name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') if cfg.MODE_MASK: roi_resized = roi_align(featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCKS[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0) # #result x #cat x 14x14 indices = tf.stack([tf.range(tf.size(final_labels)), tf.cast(final_labels, tf.int32) - 1], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 tf.sigmoid(final_mask_logits, name='output/masks') return []
def vgg_gap(image, option): with argscope(Conv2D, use_bias=True, kernel_initializer=tf.variance_scaling_initializer(scale=2.)), \ argscope([Conv2D, MaxPooling, BatchNorm, GlobalAvgPooling], data_format='channels_first'): l = convnormrelu(image, 'conv1_1', 64) if option.gating_position[11]: l = gating_op(l, option) l = convnormrelu(l, 'conv1_2', 64) if option.gating_position[12]: l = gating_op(l, option) l = MaxPooling('pool1', l, 2) if option.gating_position[1]: l = gating_op(l, option) l = convnormrelu(l, 'conv2_1', 128) if option.gating_position[21]: l = gating_op(l, option) l = convnormrelu(l, 'conv2_2', 128) if option.gating_position[22]: l = gating_op(l, option) l = MaxPooling('pool2', l, 2) if option.gating_position[2]: l = gating_op(l, option) l = convnormrelu(l, 'conv3_1', 256) if option.gating_position[31]: l = gating_op(l, option) l = convnormrelu(l, 'conv3_2', 256) if option.gating_position[32]: l = gating_op(l, option) l = convnormrelu(l, 'conv3_3', 256) if option.gating_position[33]: l = gating_op(l, option) l = MaxPooling('pool3', l, 2) if option.gating_position[3]: l = gating_op(l, option) l = convnormrelu(l, 'conv4_1', 512) if option.gating_position[41]: l = gating_op(l, option) l = convnormrelu(l, 'conv4_2', 512) if option.gating_position[42]: l = gating_op(l, option) l = convnormrelu(l, 'conv4_3', 512) if option.gating_position[43]: l = gating_op(l, option) l = MaxPooling('pool4', l, 2) if option.gating_position[4]: l = gating_op(l, option) l = convnormrelu(l, 'conv5_1', 512) if option.gating_position[51]: l = gating_op(l, option) l = convnormrelu(l, 'conv5_2', 512) if option.gating_position[52]: l = gating_op(l, option) l = convnormrelu(l, 'conv5_3', 512) if option.gating_position[53]: l = gating_op(l, option) convmaps = convnormrelu(l, 'new', 1024) if option.gating_position[6]: convmaps = gating_op(l, option) p_logits = GlobalAvgPooling('gap', convmaps) logits = FullyConnected( 'linear', p_logits, option.number_of_class, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return logits, convmaps
def AccuracyBoost(x): ''' Accuracy boost block for bottleneck layers. ''' nch = x.get_shape().as_list()[-1] g = GlobalAvgPooling('gpool', x) W = tf.get_variable('W', shape=(nch,), initializer=tf.variance_scaling_initializer(2.0)) g = BatchNorm('bn', tf.multiply(g, W)) ab = tf.reshape(tf.nn.sigmoid(g), (-1, 1, 1, nch)) return tf.multiply(x, ab, name='res')
def AccuracyBoost(x): ''' Accuracy boost block for bottleneck layers. ''' nch = x.get_shape().as_list()[-1] g = GlobalAvgPooling('gpool', x) g = tf.reshape(g, [-1, 1, 1, nch]) wp = tf.nn.sigmoid(BatchNorm('p/bn', g, training=False)) wn = tf.nn.sigmoid(BatchNorm('n/bn', -g, training=False)) return tf.multiply(x, wp + wn, name='res')
def se_resnet_bottleneck(l, ch_out, stride): shortcut = l l = Conv2D('conv1', l, ch_out, 1, nl=BNReLU) l = Conv2D('conv2', l, ch_out, 3, stride=stride, nl=BNReLU) l = Conv2D('conv3', l, ch_out * 4, 1, nl=get_bn(zero_init=True)) squeeze = GlobalAvgPooling('gap', l) squeeze = FullyConnected('fc1', squeeze, ch_out // 4, nl=tf.nn.relu) squeeze = FullyConnected('fc2', squeeze, ch_out * 4, nl=tf.nn.sigmoid) l = l * tf.reshape(squeeze, [-1, ch_out * 4, 1, 1]) return l + resnet_shortcut(shortcut, ch_out * 4, stride, nl=get_bn(zero_init=False))
def resnet(input_, DEPTH, option): ctx = get_current_tower_context() is_training = ctx.is_training mode = option.mode basicblock = preresnet_basicblock \ if mode == 'preact' else resnet_basicblock bottleneck = { 'resnet': resnet_bottleneck, 'preact': preresnet_bottleneck, 'se': se_resnet_bottleneck }[mode] cfg = { 18: ([2, 2, 2, 2], basicblock), 34: ([3, 4, 6, 3], basicblock), 50: ([3, 4, 6, 3], bottleneck), 101: ([3, 4, 23, 3], bottleneck), 152: ([3, 8, 36, 3], bottleneck) } defs, block_func = cfg[DEPTH] group_func = preresnet_group if mode == 'preact' else resnet_group with argscope(Conv2D, use_bias=False, kernel_initializer= \ tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \ argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format='channels_first'): l = Conv2D('conv0', input_, 64, 7, strides=2, activation=BNReLU) # 112 if option.attdrop[0]: l = ADL(0, l, option) l = MaxPooling('pool0', l, 3, strides=2, padding='SAME') # 56 if option.attdrop[1]: l = ADL(1, l, option) l = group_func('group0', l, block_func, 64, defs[0], 1, option) # 56 if option.attdrop[2]: l = ADL(2, l, option) l = group_func('group1', l, block_func, 128, defs[1], 2, option) # 28 if option.attdrop[3]: l = ADL(3, l, option) l = group_func('group2', l, block_func, 256, defs[2], 2, option) # 14 if option.attdrop[4]: l = ADL(4, l, option) l = group_func('group3', l, block_func, 512, defs[3], option.laststride, option) # 7 if option.attdrop[5]: l = ADL(5, l, option) prelogits = GlobalAvgPooling('gap', l) logits = FullyConnected('linearnew', prelogits, option.classnum) return logits, l
def secondclassification_head(feature, num_classes): """ Args: feature (NxCx7x7): num_classes(int): num_category + 1 Returns: cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4) """ feature = GlobalAvgPooling('gap', feature, data_format='NCHW') classification = FullyConnected( 'class', feature, num_classes, W_init=tf.random_normal_initializer(stddev=0.01)) return classification
def cls_head(feature): feature = GlobalAvgPooling('gap', feature, data_format='NCHW') fc1 = FullyConnected('fc1', feature, 1024, W_init=tf.random_normal_initializer(stddev=0.01)) fc1 = Dropout(fc1) fc2 = FullyConnected('fc2', fc1, config.NUM_CLASS, W_init=tf.random_normal_initializer(stddev=0.01)) return fc2
def resnet_backbone(image, num_blocks, group_func, block_func): with argscope(mpusim_conv2d, use_bias=False, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): # Note that TF pads the image by [2, 3] instead of [3, 2]. # Similar things happen in later stride=2 layers as well. l = mpusim_conv2d('conv0', image, 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l) logits = mpusim_fully_connected('linear', l, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return logits
def se_bottleneck(l, ch_out, stride): shortcut = l l = mpusim_conv2d('conv1', l, ch_out, 1, activation=BNReLU) l = mpusim_conv2d('conv2', l, ch_out, 3, strides=stride, activation=BNReLU) l = mpusim_conv2d('conv3', l, ch_out * 4, 1, activation=get_bn(zero_init=True)) squeeze = GlobalAvgPooling('gap', l) squeeze = mpusim_fully_connected('fc1', squeeze, ch_out // 4, activation=tf.nn.relu) squeeze = mpusim_fully_connected('fc2', squeeze, ch_out * 4, activation=tf.nn.sigmoid) data_format = get_arg_scope()['mpusim_conv2d']['data_format'] ch_ax = 1 if data_format in ['NCHW', 'channels_first'] else 3 shape = [-1, 1, 1, 1] shape[ch_ax] = ch_out * 4 l = l * tf.reshape(squeeze, shape) out = l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False)) return tf.nn.relu(out)
def se_resnet_bottleneck(l, ch_out, stride): shortcut = l if config.RESNET == 'ResXt': baseWidth = 4 D = int(math.floor(ch_out * (baseWidth / 64.0))) C = 32 l = Conv2D('conv1', l, D * C, 1, activation=BNReLU) l = Conv2D('conv2', l, D * C, 3, stride=stride, split=C, activation=BNReLU) l = Conv2D('conv3', l, ch_out * 4, 1, stride=1, activation=get_bn(zero_init=True)) else: l = Conv2D('conv1', l, ch_out, 1, activation=BNReLU) l = Conv2D('conv2', l, ch_out, 3, strides=stride, activation=BNReLU) l = Conv2D('conv3', l, ch_out * 4, 1, activation=get_bn(zero_init=True)) squeeze = GlobalAvgPooling('gap', l) squeeze = FullyConnected('fc1', squeeze, ch_out // 4, activation=tf.nn.relu) squeeze = FullyConnected('fc2', squeeze, ch_out * 4, activation=tf.nn.sigmoid) data_format = get_arg_scope()['Conv2D']['data_format'] ch_ax = 1 if data_format in ['NCHW', 'channels_first'] else 3 shape = [-1, 1, 1, 1] shape[ch_ax] = ch_out * 4 l = l * tf.reshape(squeeze, shape) out = l + resnet_shortcut( shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False)) return tf.nn.relu(out)
def roi_heads(self, image, features, proposals, targets, training=None): if training is None: training = self.training image_shape2d = tf.shape(image)[2:] # h,w featuremap = features[0] gt_boxes, gt_labels, *_ = targets if training: # sample proposal boxes in training proposals = sample_fast_rcnn_targets(proposals.boxes, gt_boxes, gt_labels) # The boxes to be used to crop RoIs. # Use all proposal boxes in inference boxes_on_featuremap = proposals.boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCKS[-1]) # nxcx7x7 # Keep C5 feature to be shared with mask branch feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn', feature_gap, cfg.DATA.NUM_CATEGORY) fastrcnn_head = FastRCNNHead( proposals, fastrcnn_box_logits, fastrcnn_label_logits, gt_boxes, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) if training: all_losses = fastrcnn_head.losses() return all_losses else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores( name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output')
def fastrcnn_head(feature, num_classes): """ Args: feature (NxCx7x7): num_classes(int): num_category + 1 Returns: cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4) """ feature = GlobalAvgPooling('gap', feature, data_format='NCHW') classification = FullyConnected( 'class', feature, num_classes, W_init=tf.random_normal_initializer(stddev=0.01)) box_regression = FullyConnected( 'box', feature, (num_classes - 1) * 4, W_init=tf.random_normal_initializer(stddev=0.001)) box_regression = tf.reshape(box_regression, (-1, num_classes - 1, 4)) return classification, box_regression
def darknet(image, use_fp16): with argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): l = Conv2D('conv0', image, 32, 3, strides=1, activation=BNLeakyReLU) l = Conv2D('conv1', l, 64, 3, strides=2, activation=BNLeakyReLU) l = resnet_group('group0', l, 64, 1) l = Conv2D('conv2', l, 128, 3, strides=2, activation=BNLeakyReLU) l = resnet_group('group1', l, 128, 2) l = Conv2D('conv3', l, 256, 3, strides=2, activation=BNLeakyReLU) l = resnet_group('group2', l, 256, 8) l = Conv2D('conv4', l, 512, 3, strides=2, activation=BNLeakyReLU) l = resnet_group('group3', l, 512, 8) l = Conv2D('conv5', l, 1024, 3, strides=2, activation=BNLeakyReLU) l = resnet_group('group4', l, 1024, 4) l = GlobalAvgPooling('gap', l) logits = FullyConnected('linear', l, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return logits
def resnet_backbone(image, num_blocks, group_func, block_func): with argscope([Conv2D, MaxPooling, AvgPooling, GlobalAvgPooling, BatchNorm], data_format='NCHW'), \ argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l) logits = FullyConnected( 'linear', l, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return logits
def resnet_backbone(image, num_blocks, group_func, block_func): with argscope([Conv2D, MaxPooling, AvgPooling, GlobalAvgPooling, BatchNorm], data_format='NCHW'), \ argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l) logits = FullyConnected('linear', l, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) """ ImageNet in 1 Hour, Sec 5.1: The 1000-way fully-connected layer is initialized by drawing weights from a zero-mean Gaussian with standard deviation of 0.01 """ return logits
def img_level_cls_outputs(feature, num_classes): """ Args: feature (any shape): num_classes(int): num_category + 1 Returns: cls_logits: N x num_class classification logits """ feature = GlobalAvgPooling('global_avg_pooling', feature, data_format='channels_first') classification = FullyConnected( 'img_level_class', feature, num_classes, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return classification
def resnet_backbone(image, num_blocks, group_func, block_func, activation_name='relu'): with argscope([Conv2D, MaxPooling, AvgPooling, GlobalAvgPooling, BatchNorm], data_format='NCHW'), \ argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): # this padding manner follows https://github.com/tensorflow/tpu/blob/master/models/official/resnet/resnet_model.py#L358 l = fixed_padding(image, 7) l = Conv2D('conv0', l, 64, 7, strides=2, activation=functools.partial(BNActivation, activation_name=activation_name), padding='valid') l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l) logits = FullyConnected('linear', l, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) """ ImageNet in 1 Hour, Sec 5.1: The 1000-way fully-connected layer is initialized by drawing weights from a zero-mean Gaussian with standard deviation of 0.01 """ return logits
def resnet_backbone_dropout(image, num_blocks, group_func, block_func): with argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out')): # Note that this pads the image by [2, 3] instead of [3, 2]. # Similar things happen in later stride=2 layers as well. l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) if config.FREEZE: l = tf.stop_gradient(l) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l) # B*7*7*512 -> b*1*1*512 logits = FullyConnected( 'linear_1', l, config.NUM_CLASS, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return logits
def resnet_backbone_dropout(image, num_blocks, group_func, block_func, num_class=config.NUM_CLASS): with argscope( Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out') ): # Note that this pads the image by [2, 3] instead of [3, 2]. # Similar things happen in later stride=2 layers as well. with backbone_scope(freeze=config.FREEZE, freeze_bn=False if not config.FREEZE_BN else True): l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l) with backbone_scope(freeze=False, freeze_bn=False if not config.FREEZE_BN else True): if config.DROPOUT: #l = tf.contrib.layers.flatten(l) l = FullyConnected( 'linear_0', l, 1024, activation=tf.nn.relu, kernel_initializer=tf.random_normal_initializer( stddev=0.01)) l = Dropout(l) # ratio to keep logits = FullyConnected( 'linear_1', l, num_class, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return logits
def resnet(input_, option): mode = option.mode DEPTH = option.depth bottleneck = {'se': se_resnet_bottleneck}[mode] cfg = { 50: ([3, 4, 6, 3], bottleneck), } defs, block_func = cfg[DEPTH] group_func = resnet_group with argscope(Conv2D, use_bias=False, kernel_initializer= \ tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \ argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format='channels_first'): l = Conv2D('conv0', input_, 64, 7, strides=2, activation=BNReLU) if option.gating_position[0]: l = gating_op(l, option) l = MaxPooling('pool0', l, 3, strides=2, padding='SAME') if option.gating_position[1]: l = gating_op(l, option) l = group_func('group0', l, block_func, 64, defs[0], 1, option) if option.gating_position[2]: l = gating_op(l, option) l = group_func('group1', l, block_func, 128, defs[1], 2, option) if option.gating_position[3]: l = gating_op(l, option) l = group_func('group2', l, block_func, 256, defs[2], 2, option) if option.gating_position[4]: l = gating_op(l, option) l = group_func('group3', l, block_func, 512, defs[3], 1, option) if option.gating_position[5]: l = gating_op(l, option) p_logits = GlobalAvgPooling('gap', l) logits = FullyConnected('linearnew', p_logits, option.number_of_class) return logits, l