def fastrcnn_outputs(feature, num_classes, class_agnostic_regression=False): """ Args: feature (any shape): num_classes(int): num_category + 1 class_agnostic_regression (bool): if True, regression to N x 1 x 4 Returns: cls_logits: N x num_class classification logits reg_logits: N x num_classx4 or Nx2x4 if class agnostic """ classification = FullyConnected( 'class', feature, num_classes, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) num_classes_for_box = 1 if class_agnostic_regression else num_classes box_regression = FullyConnected( 'box', feature, num_classes_for_box * 4, kernel_initializer=tf.random_normal_initializer(stddev=0.001)) box_regression = tf.reshape(box_regression, (-1, num_classes_for_box, 4), name='output_box') return classification, box_regression
def resnet_backbone(images, num_blocks, grp_fun, blck_fun, nfeatures, bn=True): # from tf.contrib.layers import variance_scaling_initializer with argscope(Conv2D, nl=tf.identity, use_bias=False, W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): first_input = images[0] second_input = images[1] act = BNReLU if bn else tf.nn.relu x = Conv2D('conv0', first_input, 64, 7, stride=2, nl=act) y = Conv2D('conv1', second_input, 64, 7, stride=2, nl=act) # stack second_input into channel-dimension of conv0 output x = tf.concat([x, y], axis=3, name='stack_second_input') x = MaxPooling('pool0', x, shape=3, stride=2, padding='SAME') x = grp_fun(x, 'group0', blck_fun, 64, num_blocks[0], 1, bn=bn) x = grp_fun(x, 'group1', blck_fun, 128, num_blocks[1], 2, bn=bn) x = grp_fun(x, 'group2', blck_fun, 256, num_blocks[2], 2, bn=bn) x = grp_fun(x, 'group3', blck_fun, 256, num_blocks[3], 1, bn=bn) x = GlobalAvgPooling('gap', x) x = FullyConnected('fc0', x, 1000) # NOTE linear activations gewollt ? x = FullyConnected('fc1', x, 500) # NOTE linear activations gewollt ? x = FullyConnected( 'linear', x, nfeatures, nl=tf.identity ) # NOTE sieht aus als ging Fabi von non-linear act. als default aus # NOTE die letzten 3 FC layers werden linear aktiviert (siehe Graph in TB) d.h. ein einzelnes FC layer sollte ausreichen (evtl. bessere Laufzeit) return x
def fastrcnn_outputs_cascade(feature, num_classes, stage_num): """ Args: feature (any shape): num_classes(int): num_category + 1 stage_num: Returns: cls_logits (Nxnum_class), reg_logits (Nx num_class x 4) """ prefix = '' if stage_num == 1: prefix = '_1st' elif stage_num == 2: prefix = '_2nd' elif stage_num == 3: prefix = '_3rd' classification = FullyConnected( 'class' + prefix, feature, num_classes, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) box_regression = FullyConnected( 'box' + prefix, feature, num_classes * 4, kernel_initializer=tf.random_normal_initializer(stddev=0.001)) box_regression = tf.reshape(box_regression, (-1, num_classes, 4)) return classification, box_regression
def _get_logits(self, image, pose): with argscope([Conv2D, MaxPooling], data_format=self.data_format, padding='same'), argscope( [Conv2D, FullyConnected], activation=tf.nn.relu), argscope([Conv2D], strides=1): im_fc3 = ( LinearWrap(image) # 1_1 .Conv2D('conv1_1', 64, 7).MaxPooling('pool1_1', 1) # 1_2 .Conv2D('conv1_2', 64, 5, activation=LocalNorm) # .LocalNorm('conv1_2_norm', cfg.radius, cfg.alpha, cfg.beta, cfg.bias) .MaxPooling('pool1_2', 2) # 2_1 .Conv2D('conv2_1', 64, 3).MaxPooling('pool2_1', 1) # 2_2 .Conv2D('conv2_2', 64, 3, activation=LocalNorm) # .LocalNorm('conv2_2_norm', cfg.radius, cfg.alpha, cfg.beta, cfg.bias) .MaxPooling('pool2_2', 1).FullyConnected('fc3', 1024)()) if cfg.drop_fc3: im_fc3 = tf.nn.dropout(fc3, cfg.fc3_drop_rate) pc1 = FullyConnected('pc1', pose, 16) fc4_im = FullyConnected('fc4_im', im_fc3, 1024, activation=tf.identity) fc4_pose = FullyConnected('fc4_pose', pc1, 1024, activation=tf.identity) fc4 = tf.nn.relu(fc4_im + fc4_pose) fc5 = FullyConnected('fc5', fc4, 2) return fc5
def fastrcnn_outputs(feature, num_classes): """ Args: feature (any shape): num_classes(int): num_category + 1 Returns: cls_logits (Nxnum_class), reg_logits (Nx num_class x 4) """ classification = FullyConnected( 'class', feature, num_classes, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) box_regression = FullyConnected( 'box', feature, num_classes * 4, kernel_initializer=tf.random_normal_initializer(stddev=0.001)) box_regression = tf.reshape(box_regression, (-1, num_classes, 4), name='output_box') mask_regression = FullyConnected( 'mask', feature, num_classes * 5, kernel_initializer=tf.random_normal_initializer(stddev=0.001)) mask_regression = tf.reshape(mask_regression, (-1, num_classes, 5), name='output_mask') return classification, box_regression, mask_regression
def boxclass_outputs(feature, num_classes, seed_gen, class_agnostic_regression=False): """ Args: feature: features generated from FasterRCNN head function, Num_boxes x Num_features num_classes(int): num_category + 1 class_agnostic_regression (bool): if True, regression to Num_boxes x 1 x 4 Returns: cls_logits: Num_boxes x Num_classes classification logits reg_logits: Num_boxes x num_classes x 4 or Num_boxes x 2 x 4 if class agnostic """ classification = FullyConnected( 'class', feature, num_classes, kernel_initializer=tf.random_normal_initializer(stddev=0.01, seed=seed_gen.next())) num_classes_for_box = 1 if class_agnostic_regression else num_classes box_regression = FullyConnected( 'box', feature, num_classes_for_box * 4, kernel_initializer=tf.random_normal_initializer(stddev=0.001, seed=seed_gen.next())) box_regression = tf.reshape(box_regression, [-1, num_classes_for_box, 4], name='output_box') return classification, box_regression
def se_resnet_bottleneck(l, ch_out, stride, rate): shortcut = l l = Conv2D('conv1', l, ch_out, 1, dilation_rate=rate, nl=BNReLU) l = Conv2D('conv2', l, ch_out, 3, stride=stride, dilation_rate=rate, nl=BNReLU) l = Conv2D('conv3', l, ch_out * 4, 1, dilation_rate=rate, nl=get_bn(zero_init=True)) squeeze = GlobalAvgPooling('gap', l) squeeze = FullyConnected('fc1', squeeze, ch_out // 4, nl=tf.nn.relu) squeeze = FullyConnected('fc2', squeeze, ch_out * 4, nl=tf.nn.sigmoid) data_format = get_arg_scope()['Conv2D']['data_format'] ch_ax = 1 if data_format == 'NCHW' else 3 shape = [-1, 1, 1, 1] shape[ch_ax] = ch_out * 4 l = l * tf.reshape(squeeze, shape) return l + resnet_shortcut( shortcut, ch_out * 4, stride, rate, nl=get_bn(zero_init=False))
def boxclass_2fc_head(feature, seed_gen, fp16=False): """ Fully connected layer for the class and box branch Args: feature map: The roi feature map, Num_boxes x Num_channels x H_roi x W_roi Returns: 2D head feature: Num_boxes x Num_features """ dim = cfg.FPN.BOXCLASS_FC_HEAD_DIM if fp16: feature = tf.cast(feature, tf.float16) with mixed_precision_scope(mixed=fp16): init = tf.variance_scaling_initializer( dtype=tf.float16 if fp16 else tf.float32, seed=seed_gen.next()) hidden = FullyConnected('fc6', feature, dim, kernel_initializer=init, activation=tf.nn.relu) hidden = FullyConnected('fc7', hidden, dim, kernel_initializer=init, activation=tf.nn.relu) if fp16: hidden = tf.cast(hidden, tf.float32) return hidden
def resnet_bottleneck(l, ch_out, stride, stride_first=False): """ stride_first: original resnet put stride on first conv. fb.resnet.torch put stride on second conv. """ shortcut = l l = Conv2D('conv1', l, ch_out, 1, strides=stride if stride_first else 1, activation=BNReLU) l_3x3 = Conv2D('conv2', l, ch_out, 3, strides=1 if stride_first else stride, activation=tf.identity) shape = l_3x3.get_shape().as_list() l_gap = GlobalAvgPooling('gap', l) l_gap = FullyConnected('fc1', l_gap, ch_out, activation=tf.nn.relu) l_gap = FullyConnected('fc2', l_gap, ch_out, activation=tf.identity) l_gap = tf.reshape(l_gap, [-1, ch_out, 1, 1]) l_gap = tf.tile(l_gap, [1, 1, shape[2], shape[3]]) l_concat = tf.concat([l_3x3, l_gap], axis = 1) l_concat = Conv2D('conv_c1', l_concat, ch_out, 1, strides=1, activation=tf.nn.relu) l_concat = Conv2D('conv_c2', l_concat, ch_out, 1, strides=1, activation=tf.identity) l_concat = tf.sigmoid(l_concat) l = l_3x3 + l_gap * l_concat l = BNReLU('conv2',l) l = Conv2D('conv3', l, ch_out * 4, 1, activation=get_bn(zero_init=True)) return l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False))
def fastrcnn_outputs(feature, num_classes, class_agnostic_regression=False): """ Args: feature (any shape): num_classes(int): num_category + 1 class_agnostic_regression (bool): if True, regression to N x 1 x 4 Returns: cls_logits: N x num_class classification logits 2-D reg_logits: N x num_class x 4 or Nx2x4 if class agnostic 3-D """ # cls with varreplace.freeze_variables(stop_gradient=False, skip_collection=True): classification = FullyConnected( 'class', feature, num_classes, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) num_classes_for_box = 1 if class_agnostic_regression else num_classes # reg box_regression = FullyConnected( 'box', feature, num_classes_for_box * 4, kernel_initializer=tf.random_normal_initializer(stddev=0.001)) box_regression = tf.reshape(box_regression, (-1, num_classes_for_box, 4), name='output_box') return classification, box_regression
def se_resnet_bottleneck(option, l, ch_out, stride, adl_index=None): shortcut = l l = Conv2D('conv1', l, ch_out, 1, activation=BNReLU) l = Conv2D('conv2', l, ch_out, 3, strides=stride, activation=BNReLU) l = Conv2D('conv3', l, ch_out * 4, 1, activation=get_bn(zero_init=True)) squeeze = GlobalAvgPooling('gap', l) squeeze = FullyConnected('fc1', squeeze, ch_out // 4, activation=tf.nn.relu) squeeze = FullyConnected('fc2', squeeze, ch_out * 4, activation=tf.nn.sigmoid) ch_ax = 1 if is_data_format_nchw() else 3 shape = [-1, 1, 1, 1] shape[ch_ax] = ch_out * 4 l = l * tf.reshape(squeeze, shape) out = l + resnet_shortcut( shortcut, ch_out * 4, stride, activation=get_bn()) out = tf.nn.relu(out) if option.gating_position[adl_index]: out = gating_op(out, option) return out
def network_architecture(self, *args): cam1, cam2, mask, normal, depth = args with tf.variable_scope("env_net"): # Prepare the sgs pca components layers_needed = int(log2(cam1.shape[1].value) - 2) batch_size = tf.shape(cam1)[0] with argscope([Conv2D, Conv2DTranspose, BatchNorm], data_format="channels_last"): l = tf.concat([cam1, cam2, mask[:, :, :, 0:1], normal, depth], axis=-1) with tf.variable_scope("enc"): for i in range(layers_needed): l = Conv2D( "conv%d" % (i + 1), l, min(self.base_nf * (2**i), 256), 4, strides=2, activation=INReLU, ) encoded = tf.identity(l, "encoded") with tf.variable_scope("env_map"): sgs = Conv2D("conv1", encoded, 256, 3, strides=2, activation=tf.nn.relu) sgs = Conv2D("conv2", sgs, 512, 3, strides=2, activation=tf.nn.relu) sgs = tf.layers.Flatten()(sgs) sgs = FullyConnected("fc1", sgs, 256, activation=tf.nn.relu) sgs = Dropout("drop", sgs, 0.75) # This is the keep prop outputSize = self.num_sgs * 3 sgs = FullyConnected("fc2", sgs, outputSize, activation=tf.nn.sigmoid) with tf.variable_scope("predictions"): sgs = tf.identity( tf.reshape(sgs * MAX_VAL, [-1, self.num_sgs, 3]), name="sgs", ) return sgs
def se_resnet_bottleneck(l, ch_out, stride): shortcut = l l = Conv2D('conv1', l, ch_out, 1, nl=BNReLU) l = Conv2D('conv2', l, ch_out, 3, stride=stride, nl=BNReLU) l = Conv2D('conv3', l, ch_out * 4, 1, nl=get_bn(zero_init=True)) squeeze = GlobalAvgPooling('gap', l) squeeze = FullyConnected('fc1', squeeze, ch_out // 4, nl=tf.nn.relu) squeeze = FullyConnected('fc2', squeeze, ch_out * 4, nl=tf.nn.sigmoid) l = l * tf.reshape(squeeze, [-1, ch_out * 4, 1, 1]) return l + resnet_shortcut(shortcut, ch_out * 4, stride, nl=get_bn(zero_init=False))
def attr_output(name, feature): hidden = FullyConnected( '{}_hidden'.format(name), feature, 512, activation=tf.nn.relu, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) attr = FullyConnected( name, hidden, 2, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return attr
def fastrcnn_2fc_head(feature): """ Args: feature (any shape): Returns: 2D head feature """ dim = cfg.FPN.FRCNN_FC_HEAD_DIM init = tf.variance_scaling_initializer() hidden = FullyConnected('fc6', feature, dim, kernel_initializer=init, activation=tf.nn.relu) hidden = FullyConnected('fc7', hidden, dim, kernel_initializer=init, activation=tf.nn.relu) return hidden
def cls_head(feature): feature = GlobalAvgPooling('gap', feature, data_format='NCHW') fc1 = FullyConnected('fc1', feature, 1024, W_init=tf.random_normal_initializer(stddev=0.01)) fc1 = Dropout(fc1) fc2 = FullyConnected('fc2', fc1, config.NUM_CLASS, W_init=tf.random_normal_initializer(stddev=0.01)) return fc2
def fastrcnn_2fc_head(feature, num_classes): """ Args: feature (any shape): num_classes(int): num_category + 1 Returns: cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4) """ dim = config.FASTRCNN_FC_HEAD_DIM init = tf.variance_scaling_initializer() hidden = FullyConnected('fc6', feature, dim, kernel_initializer=init, nl=tf.nn.relu) hidden = FullyConnected('fc7', hidden, dim, kernel_initializer=init, nl=tf.nn.relu) return fastrcnn_outputs('outputs', hidden, num_classes)
def fastrcnn_2fc_head(feature, num_classes): """ Args: feature (any shape): num_classes(int): num_category + 1 Returns: outputs of `fastrcnn_outputs()` """ dim = cfg.FPN.FRCNN_FC_HEAD_DIM init = tf.variance_scaling_initializer() hidden = FullyConnected('fc6', feature, dim, kernel_initializer=init, activation=tf.nn.relu) hidden = FullyConnected('fc7', hidden, dim, kernel_initializer=init, activation=tf.nn.relu) return fastrcnn_outputs('outputs', hidden, num_classes)
def fastrcnn_2fc_head_(scope_str, feature, L2_reg, training): """ Args: feature (any shape): Returns: 2D head feature """ dim = cfg.FPN.FRCNN_FC_HEAD_DIM init = tf.variance_scaling_initializer() import tensorflow.contrib.slim as slim from net.faster_rcnn import fasterrcnn_arg_scope arg_scope = fasterrcnn_arg_scope(weight_decay=L2_reg) with slim.arg_scope(arg_scope): with slim.arg_scope([slim.batch_norm], is_training=training): with tf.variable_scope(scope_str): hidden = slim.fully_connected(feature, dim, activation_fn=tf.nn.relu, scope='fc6') hidden = slim.fully_connected(hidden, dim, activation_fn=tf.nn.relu, scope='fc7') hidden = FullyConnected('fc6', feature, dim, kernel_initializer=init, activation=tf.nn.relu) # hidden = FullyConnected('fc7', hidden, dim, kernel_initializer=init, activation=tf.nn.relu) return hidden
def fastrcnn_Xconv1fc_head(feature, num_convs, norm=None): """ Args: feature (NCHW): num_classes(int): num_category + 1 num_convs (int): number of conv layers norm (str or None): either None or 'GN' Returns: 2D head feature """ assert norm in [None, 'GN'], norm l = feature with argscope(Conv2D, data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='normal')): for k in range(num_convs): l = Conv2D('conv{}'.format(k), l, cfg.FPN.FRCNN_CONV_HEAD_DIM, 3, activation=tf.nn.relu) if norm is not None: l = GroupNorm('gn{}'.format(k), l) l = FullyConnected( 'fc', l, cfg.FPN.FRCNN_FC_HEAD_DIM, kernel_initializer=tf.variance_scaling_initializer(), activation=tf.nn.relu) return l
def se_resnet_bottleneck(l, ch_out, stride): shortcut = l l = Conv2D('conv1', l, ch_out, 1, activation=BNReLU) l = Conv2D('conv2', l, ch_out, 3, strides=stride, activation=BNReLU) l = Conv2D('conv3', l, ch_out * 4, 1, activation=get_bn(zero_init=True)) squeeze = GlobalAvgPooling('gap', l) squeeze = FullyConnected('fc1', squeeze, ch_out // 4, activation=tf.nn.relu) squeeze = FullyConnected('fc2', squeeze, ch_out * 4, activation=tf.nn.sigmoid) data_format = get_arg_scope()['Conv2D']['data_format'] ch_ax = 1 if data_format in ['NCHW', 'channels_first'] else 3 shape = [-1, 1, 1, 1] shape[ch_ax] = ch_out * 4 l = l * tf.reshape(squeeze, shape) out = l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False)) return tf.nn.relu(out)
def get_logits(self, x): with argscope( [Conv2D, MaxPooling, AvgPooling, GlobalAvgPooling, BatchNorm], data_format='channels_first'): x = res_init_block(x=x, in_channels=self.in_channels, out_channels=self.init_block_channels, name="features/init_block") in_channels = self.init_block_channels for i, channels_per_stage in enumerate(self.channels): for j, out_channels in enumerate(channels_per_stage): strides = 2 if (j == 0) and (i != 0) else 1 x = res_unit("features/stage{}/unit{}".format( i + 1, j + 1), x, in_channels=in_channels, out_channels=out_channels, strides=strides, bottleneck=self.bottleneck, conv1_stride=self.conv1_stride, use_se=self.use_se) in_channels = out_channels # x = AvgPooling( # "final_pool", # x, # pool_size=7, # strides=1) x = GlobalAvgPooling("features/final_pool", x) x = FullyConnected("output", x, units=self.classes) return x
def fastrcnn_Xconv1fc_head(feature, num_classes, num_convs): """ Args: feature (any shape): num_classes(int): num_category + 1 num_convs (int): number of conv layers Returns: cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4) """ l = feature with argscope(Conv2D, data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='normal')): for k in range(num_convs): l = Conv2D('conv{}'.format(k), l, cfg.FPN.FRCNN_CONV_HEAD_DIM, 3, activation=tf.nn.relu) l = FullyConnected( 'fc', l, cfg.FPN.FRCNN_FC_HEAD_DIM, kernel_initializer=tf.variance_scaling_initializer(), activation=tf.nn.relu) return fastrcnn_outputs('outputs', l, num_classes)
def resnet_backbone(image, num_blocks, group_func, block_func): """ Sec 5.1: We adopt the initialization of [15] for all convolutional layers. TensorFlow does not have the true "MSRA init". We use variance_scaling as an approximation. """ with argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out')): l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l) logits = FullyConnected( 'linear', l, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) """ Sec 5.1: The 1000-way fully-connected layer is initialized by drawing weights from a zero-mean Gaussian with standard deviation of 0.01. """ return logits
def re_id_loss(pred_boxes, pred_matching_gt_ids, featuremap): with tf.variable_scope('id_head'): num_of_samples_used = tf.get_variable( 'num_of_samples_used', initializer=0, trainable=False) num_of_samples_used = num_of_samples_used.assign_add( tf.shape(pred_boxes)[0]) boxes_on_featuremap = pred_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) # name scope? # stop gradient roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_idhead = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) # nxcx7x7 feature_gap = GlobalAvgPooling( 'gap', feature_idhead, data_format='channels_first') init = tf.variance_scaling_initializer() hidden = FullyConnected('fc6', feature_gap, 1024, kernel_initializer=init, activation=tf.nn.relu) hidden = FullyConnected('fc7', hidden, 1024, kernel_initializer=init, activation=tf.nn.relu) hidden = FullyConnected('fc8', hidden, 256, kernel_initializer=init, activation=tf.nn.relu) id_logits = FullyConnected( 'class', hidden, cfg.DATA.NUM_ID, kernel_initializer=tf.random_normal_initializer( stddev=0.01)) label_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=pred_matching_gt_ids, logits=id_logits) label_loss = tf.reduce_mean(label_loss, name='label_loss') return label_loss, num_of_samples_used
def vgg_gap(image, option, importance=False): ctx = get_current_tower_context() is_training = ctx.is_training with argscope(Conv2D, kernel_initializer=tf.variance_scaling_initializer(scale=2.)), \ argscope([Conv2D, MaxPooling, BatchNorm, GlobalAvgPooling], data_format='channels_first'): l = convnormrelu(image, 'conv1_1', 64, option) if option.attdrop[11]: l = ADL(11, l, option) l = convnormrelu(l, 'conv1_2', 64, option) if option.attdrop[12]: l = ADL(12, l, option) l = MaxPooling('pool1', l, 2) if option.attdrop[1]: l = ADL(1, l, option) l = convnormrelu(l, 'conv2_1', 128, option) if option.attdrop[21]: l = ADL(21, l, option) l = convnormrelu(l, 'conv2_2', 128, option) if option.attdrop[22]: l = ADL(21, l, option) l = MaxPooling('pool2', l, 2) if option.attdrop[2]: l = ADL(2, l, option) l = convnormrelu(l, 'conv3_1', 256, option) if option.attdrop[31]: l = ADL(31, l, option) l = convnormrelu(l, 'conv3_2', 256, option) if option.attdrop[32]: l = ADL(32, l, option) l = convnormrelu(l, 'conv3_3', 256, option) if option.attdrop[33]: l = ADL(33, l, option) l = MaxPooling('pool3', l, 2) if option.attdrop[3]: l = ADL(3, l, option) l = convnormrelu(l, 'conv4_1', 512, option) if option.attdrop[41]: l = ADL(41, l, option) l = convnormrelu(l, 'conv4_2', 512, option) if option.attdrop[42]: l = ADL(42, l, option) l = convnormrelu(l, 'conv4_3', 512, option) if option.attdrop[43]: l = ADL(43, l, option) l = MaxPooling('pool4', l, 2) if option.attdrop[4]: l = ADL(4, l, option) l = convnormrelu(l, 'conv5_1', 512, option) if option.attdrop[51]: l = ADL(51, l, option) l = convnormrelu(l, 'conv5_2', 512, option) if option.attdrop[52]: l = ADL(52, l, option) l = convnormrelu(l, 'conv5_3', 512, option) if option.attdrop[53]: l = ADL(53, l, option) convmaps = convnormrelu(l, 'new', 1024, option) if option.attdrop[6]: l = ADL(6, l, option) pre_logits = GlobalAvgPooling('gap', convmaps) logits = FullyConnected( 'linear', pre_logits, option.classnum, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return logits, convmaps
def se_resnet_bottleneck(l, ch_out, stride): shortcut = l if config.RESNET == 'ResXt': baseWidth = 4 D = int(math.floor(ch_out * (baseWidth / 64.0))) C = 32 l = Conv2D('conv1', l, D * C, 1, activation=BNReLU) l = Conv2D('conv2', l, D * C, 3, stride=stride, split=C, activation=BNReLU) l = Conv2D('conv3', l, ch_out * 4, 1, stride=1, activation=get_bn(zero_init=True)) else: l = Conv2D('conv1', l, ch_out, 1, activation=BNReLU) l = Conv2D('conv2', l, ch_out, 3, strides=stride, activation=BNReLU) l = Conv2D('conv3', l, ch_out * 4, 1, activation=get_bn(zero_init=True)) squeeze = GlobalAvgPooling('gap', l) squeeze = FullyConnected('fc1', squeeze, ch_out // 4, activation=tf.nn.relu) squeeze = FullyConnected('fc2', squeeze, ch_out * 4, activation=tf.nn.sigmoid) data_format = get_arg_scope()['Conv2D']['data_format'] ch_ax = 1 if data_format in ['NCHW', 'channels_first'] else 3 shape = [-1, 1, 1, 1] shape[ch_ax] = ch_out * 4 l = l * tf.reshape(squeeze, shape) out = l + resnet_shortcut( shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False)) return tf.nn.relu(out)
def fastrcnn_head(feature, num_classes): """ Args: feature (NxCx1x1): num_classes(int): num_category + 1 Returns: cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4) """ with tf.variable_scope('fastrcnn'): classification = FullyConnected( 'class', feature, num_classes, W_init=tf.random_normal_initializer(stddev=0.01)) box_regression = FullyConnected( 'box', feature, (num_classes - 1) * 4, W_init=tf.random_normal_initializer(stddev=0.001)) box_regression = tf.reshape(box_regression, (-1, num_classes - 1, 4)) return classification, box_regression
def fastrcnn_head(feature, num_classes): """ Args: feature (NxCx7x7): num_classes(int): num_category + 1 Returns: cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4) """ feature = GlobalAvgPooling('gap', feature, data_format='NCHW') classification = FullyConnected( 'class', feature, num_classes, W_init=tf.random_normal_initializer(stddev=0.01)) box_regression = FullyConnected( 'box', feature, (num_classes - 1) * 4, W_init=tf.random_normal_initializer(stddev=0.001)) box_regression = tf.reshape(box_regression, (-1, num_classes - 1, 4)) return classification, box_regression
def vgg_gap(image, option): with argscope(Conv2D, use_bias=True, kernel_initializer=tf.variance_scaling_initializer(scale=2.)), \ argscope([Conv2D, MaxPooling, BatchNorm, GlobalAvgPooling], data_format='channels_first'): l = convnormrelu(image, 'conv1_1', 64) if option.gating_position[11]: l = gating_op(l, option) l = convnormrelu(l, 'conv1_2', 64) if option.gating_position[12]: l = gating_op(l, option) l = MaxPooling('pool1', l, 2) if option.gating_position[1]: l = gating_op(l, option) l = convnormrelu(l, 'conv2_1', 128) if option.gating_position[21]: l = gating_op(l, option) l = convnormrelu(l, 'conv2_2', 128) if option.gating_position[22]: l = gating_op(l, option) l = MaxPooling('pool2', l, 2) if option.gating_position[2]: l = gating_op(l, option) l = convnormrelu(l, 'conv3_1', 256) if option.gating_position[31]: l = gating_op(l, option) l = convnormrelu(l, 'conv3_2', 256) if option.gating_position[32]: l = gating_op(l, option) l = convnormrelu(l, 'conv3_3', 256) if option.gating_position[33]: l = gating_op(l, option) l = MaxPooling('pool3', l, 2) if option.gating_position[3]: l = gating_op(l, option) l = convnormrelu(l, 'conv4_1', 512) if option.gating_position[41]: l = gating_op(l, option) l = convnormrelu(l, 'conv4_2', 512) if option.gating_position[42]: l = gating_op(l, option) l = convnormrelu(l, 'conv4_3', 512) if option.gating_position[43]: l = gating_op(l, option) l = MaxPooling('pool4', l, 2) if option.gating_position[4]: l = gating_op(l, option) l = convnormrelu(l, 'conv5_1', 512) if option.gating_position[51]: l = gating_op(l, option) l = convnormrelu(l, 'conv5_2', 512) if option.gating_position[52]: l = gating_op(l, option) l = convnormrelu(l, 'conv5_3', 512) if option.gating_position[53]: l = gating_op(l, option) convmaps = convnormrelu(l, 'new', 1024) if option.gating_position[6]: convmaps = gating_op(l, option) p_logits = GlobalAvgPooling('gap', convmaps) logits = FullyConnected( 'linear', p_logits, option.number_of_class, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return logits, convmaps