def resnet_backbone(image, num_blocks, group_func, block_func): with argscope([Conv2D], use_bias=False, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): logits = (LinearWrap(image).Conv2D('conv0', 64, 7, strides=2, activation=BNReLU)()) logits = (LinearWrap(logits).MaxPooling('pool0', shape=3, stride=2, padding='SAME') .apply(group_func, 'group0', block_func, 64, num_blocks[0], 1) .apply(group_func, 'group1', block_func, 128, num_blocks[1], 2) .apply(group_func, 'group2', block_func, 256, num_blocks[2], 2) .apply(group_func, 'group3', block_func, 512, num_blocks[3], 2)()) logits = (LinearWrap(logits).GlobalAvgPooling('gap')()) logits = (LinearWrap(logits).FullyConnected('linear', 1000)()) return logits
def resnet_backbone(image, num_blocks, block_func): with argscope(Conv2D, nl=tf.identity, use_bias=False, W_init=variance_scaling_initializer(mode='FAN_OUT')): logits = (LinearWrap(image).Conv2D( 'conv0', 64, 7, stride=2, nl=BNReLU).MaxPooling( 'pool0', shape=3, stride=2, padding='SAME').apply( resnet_group, 'group0', block_func, 64, num_blocks[0], 1, first=True).apply( resnet_group, 'group1', block_func, 128, num_blocks[1], 2).apply(resnet_group, 'group2', block_func, 256, num_blocks[2], 2).apply(resnet_group, 'group3', block_func, 512, num_blocks[3], 2).BNReLU('bnlast').GlobalAvgPooling( 'gap').FullyConnected( 'linear', 1000, nl=tf.identity)()) return logits
def resnet_backbone(image, num_blocks, group_func, block_func): with argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='untruncated_normal')): logits = (LinearWrap(image).tf.pad([ [0, 0], [0, 0], [3, 3], [3, 3] ]).Conv2D('conv0', 64, 7, strides=2, activation=NormAct, padding='VALID').tf.pad([[0, 0], [0, 0], [1, 1], [ 1, 1 ]]).MaxPooling('pool0', shape=3, stride=2, padding='VALID').apply( group_func, 'group0', block_func, 64, num_blocks[0], 1).apply(group_func, 'group1', block_func, 128, num_blocks[1], 2).apply(group_func, 'group2', block_func, 256, num_blocks[2], 2). apply(group_func, 'group3', block_func, 512, num_blocks[3], 2).GlobalAvgPooling('gap').FullyConnected( 'linear', 1000, kernel_initializer=tf.random_normal_initializer( stddev=0.01))()) return logits
def resnet_backbone(image, num_blocks, group_func, group_func_dilation, block_func, block_func_dilation): with argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out')): # with freeze_variables(stop_gradient=True, skip_collection=True): logits = (LinearWrap(image).Conv2D( 'conv0', 64, 7, strides=2, activation=BNReLU).MaxPooling( 'pool0', shape=3, stride=2, padding='SAME').apply( group_func, 'group0', block_func, 64, num_blocks[0], 1).apply(group_func, 'group1', block_func, 128, num_blocks[1], 2).apply(group_func_dilation, 'group2', block_func_dilation, 256, num_blocks[2], 1, 2).apply(group_func_dilation, 'group3', block_func_dilation, 512, num_blocks[3], 1, 4)) logits = (logits.Conv2D('conv102', 21, 1, stride=1, activation=tf.identity)()) # logits = logits.Conv2D('conv102', 21, 1, stride=1, nl=tf.identity)() # tf.get_default_graph().clear_collection(tf.GraphKeys.TRAINABLE_VARIABLES) # with tf.variable_scope('conv102', reuse=True): # W = tf.get_variable('W') # tf.add_to_collection(tf.GraphKeys.TRAINABLE_VARIABLES, W) return logits
def _get_logits(self, image, pose): with argscope([Conv2D, MaxPooling], data_format=self.data_format, padding='same'), argscope( [Conv2D, FullyConnected], activation=tf.nn.relu), argscope([Conv2D], strides=1): im_fc3 = ( LinearWrap(image) # 1_1 .Conv2D('conv1_1', 64, 7).MaxPooling('pool1_1', 1) # 1_2 .Conv2D('conv1_2', 64, 5, activation=LocalNorm) # .LocalNorm('conv1_2_norm', cfg.radius, cfg.alpha, cfg.beta, cfg.bias) .MaxPooling('pool1_2', 2) # 2_1 .Conv2D('conv2_1', 64, 3).MaxPooling('pool2_1', 1) # 2_2 .Conv2D('conv2_2', 64, 3, activation=LocalNorm) # .LocalNorm('conv2_2_norm', cfg.radius, cfg.alpha, cfg.beta, cfg.bias) .MaxPooling('pool2_2', 1).FullyConnected('fc3', 1024)()) if cfg.drop_fc3: im_fc3 = tf.nn.dropout(fc3, cfg.fc3_drop_rate) pc1 = FullyConnected('pc1', pose, 16) fc4_im = FullyConnected('fc4_im', im_fc3, 1024, activation=tf.identity) fc4_pose = FullyConnected('fc4_pose', pc1, 1024, activation=tf.identity) fc4 = tf.nn.relu(fc4_im + fc4_pose) fc5 = FullyConnected('fc5', fc4, 2) return fc5
def resnet_backbone(image, num_blocks, group_func, block_func): with argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out')): logits = (LinearWrap(image).Conv2D( 'conv0', 64, 7, strides=2, nl=BNReLU).MaxPooling('pool0', 3, strides=2, padding='SAME').apply( group_func, 'group0', block_func, 64, num_blocks[0], 1).apply(group_func, 'group1', block_func, 128, num_blocks[1], 2).apply(group_func, 'group2', block_func, 256, num_blocks[2], 2). apply(group_func, 'group3', block_func, 512, num_blocks[3], 2).GlobalAvgPooling('gap').FullyConnected( 'linear', 1000, kernel_initializer=tf.random_normal_initializer( stddev=0.01))()) """ Sec 5.1: The 1000-way fully-connected layer is initialized by drawing weights from a zero-mean Gaussian with standard deviation of 0.01. """ return logits
def initial_convolution(image, init_channel, s_type='basic', name='init_conv0'): with tf.variable_scope(name): if s_type == 'basic': l = Conv2D('conv0', image, init_channel, 3) elif s_type == 'imagenet': l = (LinearWrap(image) .Conv2D('conv0', init_channel, 7, strides=2, activation=tf.identity) .MaxPooling('pool0', 3, strides=2, padding='same')()) elif s_type == 'conv7': l = Conv2D('conv0_7x7', image, init_channel, 7, strides=2) elif s_type == 'conv3': l = Conv2D('conv0_3x3', image, init_channel, 3, strides=2) else: raise Exception("Unknown starting type (s_type): {}".format(s_type)) l = BatchNorm('init_bn', l) return l
def resnet_backbone(image, num_blocks, group_func, block_func, output_dims): with argscope(Conv2D, nl=tf.identity, use_bias=False, W_init=variance_scaling_initializer(mode='FAN_OUT')): logits = (LinearWrap(image) .Conv2D('conv0', 64, 7, stride = 2, nl = BNReLU) .MaxPooling('pool0', shape = 3, stride = 2, padding = 'SAME') .apply(group_func, 'group0', block_func, 64, num_blocks[0], 1) .apply(group_func, 'group1', block_func, 128, num_blocks[1], 2) .apply(group_func, 'group2', block_func, 256, num_blocks[2], 2) .apply(group_func, 'group3', block_func, 512, num_blocks[3], 2) .GlobalAvgPooling('gap') .FullyConnected('linear_C{}'.format(output_dims), output_dims, \ nl = tf.identity)()) return logits
def resnet_backbone(image, num_blocks, grp_fun, blck_fun, nfeatures): # from tf.contrib.layers import variance_scaling_initializer with argscope(Conv2D, nl=tf.identity, use_bias=False, W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): # TODO evaluate conv depth logits = (LinearWrap(image) .Conv2D('conv0', 64, 7, stride=2, nl=BNReLU) .MaxPooling('pool0', shape=3, stride=2, padding='SAME') .apply(grp_fun, 'group0', blck_fun, 64, num_blocks[0], 1) .apply(grp_fun, 'group1', blck_fun, 128, num_blocks[1], 2) .apply(grp_fun, 'group2', blck_fun, 256, num_blocks[2], 2) # .apply(grp_fun, 'group3', blck_fun, 512, num_blocks[3], 2) .apply(grp_fun, 'group3', blck_fun, 256, num_blocks[3], 1) .GlobalAvgPooling('gap') .FullyConnected('fc0', 1000) .FullyConnected('fc1', 500) .FullyConnected('linear', nfeatures, nl=tf.identity)()) return logits
def resnet_backbone(image, num_blocks, group_func, block_func): with argscope(Conv2D, nl=tf.identity, use_bias=False, W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): logits = (LinearWrap(image).Conv2D( 'conv0', 64, 7, stride=2, nl=BNReLU).MaxPooling( 'pool0', shape=3, stride=2, padding='SAME').apply( group_func, 'group0', block_func, 64, num_blocks[0], 1).apply(group_func, 'group1', block_func, 128, num_blocks[1], 2).apply( group_func, 'group2', block_func, 256, num_blocks[2], 2).apply( group_func, 'group3', block_func, 512, num_blocks[3], 2).GlobalAvgPooling('gap').FullyConnected( 'linear', NUM_CLASSES, nl=tf.identity)()) return logits
def residual_bottleneck_layer(name, l, out_filters, strides, data_format): data_format = get_data_format(data_format, keras_mode=False) ch_dim = 3 if data_format == 'NHWC' else 1 ch_in = _get_dim(l, ch_dim) ch_base = out_filters ch_last = ch_base * 4 l_in = l with tf.variable_scope('{}.0'.format(name)): l = BatchNorm('bn0', l) l = tf.nn.relu(l) l = (LinearWrap(l) .Conv2D('conv1x1_0', ch_base, 1, activation=BNReLU) .Conv2D('conv3x3_1', ch_base, 3, strides=strides, activation=BNReLU) .Conv2D('conv1x1_2', ch_last, 1)()) l = BatchNorm('bn_3', l) shortcut = l_in if ch_in != ch_last: shortcut = Conv2D('conv_short', shortcut, ch_last, 1, strides=strides) shortcut = BatchNorm('bn_short', shortcut) l = l + shortcut return l
def get_logits(self, image): num_blocks = [3, 4, 6, 3] with argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format=self.data_format), \ argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='untruncated_normal')), \ argscope(BatchNorm, epsilon=1.001e-5): logits = (LinearWrap(image).tf.pad([[0, 0], [0, 0], [3, 3], [ 3, 3 ]]).Conv2D('conv0', 64, 7, strides=2, padding='VALID').apply( self.norm_func, 'conv0').tf.nn.relu().tf.pad([ [0, 0], [0, 0], [1, 1], [1, 1] ]).MaxPooling('pool0', shape=3, stride=2, padding='VALID').apply( self.resnet_group, 'group0', 64, num_blocks[0], 1).apply( self.resnet_group, 'group1', 128, num_blocks[1], 2).apply( self.resnet_group, 'group2', 256, num_blocks[2], 2).apply( self.resnet_group, 'group3', 512, num_blocks[3], 2).GlobalAvgPooling('gap')()) if self.num_output is not None: for idx, no in enumerate(self.num_output): logits = FullyConnected( 'linear{}_{}'.format(idx, no), logits, no, kernel_initializer=tf.random_normal_initializer( stddev=0.01)) if idx != len(self.num_output) - 1: logits = tf.nn.relu(logits) return logits
def _build_graph(self, inputs): image, label = inputs self.batch_size = tf.shape(image)[0] self.image_size = tf.shape(image)[1:3] org_label = label # when show image summary, first convert to RGB format image_rgb = tf.reverse(image, axis=[-1]) label_shown = tf.where(tf.equal(label, cfg.ignore_label), tf.zeros_like(label), label) label_shown = tf.cast(label_shown * 10, tf.uint8) tf.summary.image('input-image', image_rgb, max_outputs=3) tf.summary.image('input-label', label_shown, max_outputs=3) image = DeeplabModel.image_preprocess(image, bgr=True) if self.data_format == "NCHW": image = tf.transpose(image, [0, 3, 1, 2]) # the backbone part logits, low_level_features = self._get_logits(image) # import pdb # pdb.set_trace() encoder_output = atrous_spatial_pyramid_pooling(logits) with tf.variable_scope('decoder'): with tf.variable_scope('low_level_features'): # graph = tf.get_default_graph() # import pdb # pdb.set_trace() # self.low_level_features_name = 'tower0/group0/block2/conv1/Relu' # low_level_features = graph.get_operation_by_name(self.low_level_features_name).values()[0] # low_level_features = end_points[cfg.base_architecture_tp + '/group0/block2/conv1'] low_level_features = Conv2D('conv_1x1', low_level_features, 48, 1, strides=1, activation=tf.nn.relu) low_level_features_size = tf.shape(low_level_features)[1:3] with tf.variable_scope('upsampling_logits'): net = tf.image.resize_bilinear(encoder_output, low_level_features_size, name='upsample_1') net = tf.concat([net, low_level_features], axis=3, name='concat') with argscope(Conv2D, filters=256, kernel_size=3, strides=1, activation=tf.nn.relu): net = (LinearWrap(net).Conv2D('conv_3x3_1').Conv2D( 'conv_3x3_2').Conv2D('conv_1x1', filters=cfg.num_classes, kernel_size=1, strides=1, activation=None)()) # Compute softmax cross entropy loss for logits logits = tf.image.resize_bilinear(net, self.image_size, align_corners=True) label_flatten = tf.reshape(label, shape=[-1]) mask = tf.to_float(tf.not_equal(label_flatten, cfg.ignore_label)) * 1.0 one_hot_label = tf.one_hot(label_flatten, cfg.num_classes, on_value=1.0, off_value=0.0) loss = tf.losses.softmax_cross_entropy( one_hot_label, tf.reshape(logits, shape=[-1, cfg.num_classes]), weights=mask) if cfg.weight_decay > 0: wd_cost = regularize_cost('.*/W', l2_regularizer(cfg.weight_decay), name='l2_regularize_loss') else: wd_cost = tf.constant(0.0) self.cost = tf.add_n([loss, wd_cost], name='cost') pred = tf.argmax(tf.nn.softmax(logits), 3, name='predicts') pred_shown = pred * 10 pred_shown = tf.cast(tf.expand_dims(pred_shown, -1), tf.uint8) pred_shown = tf.where(tf.equal(label, cfg.ignore_label), tf.zeros_like(label), pred_shown) tf.summary.image('input-preds', tf.cast(pred_shown, tf.uint8), max_outputs=3) # compute the mean_iou pred_flatten = tf.reshape(pred, shape=[-1]) label_flatten = tf.where(tf.equal(label_flatten, cfg.ignore_label), tf.zeros_like(label_flatten), label_flatten) label_flatten = tf.cast(label_flatten, tf.int64) miou, miou_update_op = tf.metrics.mean_iou(label_flatten, pred_flatten, cfg.num_classes, weights=mask, name="metric_scope") running_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="InferenceTower/metric_scope") # running_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES) miou_reset_op = tf.variables_initializer(var_list=running_vars, name='miou_reset_op') miou = tf.identity(miou, name='miou') miou_update_op = tf.identity(miou_update_op, name='miou_update_op') add_moving_summary(loss, wd_cost, self.cost)
def resnet_backbone(image, num_blocks, group_func, block_func, class_num, ASPP=False): with argscope(Conv2D, nl=tf.identity, use_bias=False, W_init=variance_scaling_initializer(mode='FAN_OUT')): resnet_head = (LinearWrap(image).Conv2D( 'conv0', 64, 7, stride=2, nl=BNReLU).MaxPooling( 'pool0', shape=3, stride=2, padding='SAME').apply( group_func, 'group0', block_func, 64, num_blocks[0], 1, dilation=1, stride_first=False).apply(group_func, 'group1', block_func, 128, num_blocks[1], 2, dilation=1, stride_first=True).apply( group_func, 'group2', block_func, 256, num_blocks[2], 2, dilation=2, stride_first=True).apply( group_func, 'group3', block_func, 512, num_blocks[3], 1, dilation=4, stride_first=False)()) def aspp_branch(input, rate): input = AtrousConv2D('aspp{}_conv'.format(rate), input, class_num, kernel_shape=3, rate=rate) return input if ASPP: output = aspp_branch(resnet_head, 6) + aspp_branch( resnet_head, 12) + aspp_branch(resnet_head, 18) + aspp_branch( resnet_head, 24) else: output = aspp_branch(resnet_head, 6) output = tf.image.resize_bilinear(output, image.shape[1:3]) return output