def __init__(self, input_tensors, output_tensors, return_input=False, sess=None): """ Args: input_tensors (list): list of names. output_tensors (list): list of names. return_input (bool): same as :attr:`PredictorBase.return_input`. sess (tf.Session): the session this predictor runs in. If None, will use the default session at the first call. """ self.return_input = return_input self.input_tensors = input_tensors self.output_tensors = output_tensors self.sess = sess self._use_callable = get_tf_version_number() >= 1.2 if self._use_callable: if sess is not None: self._callable = sess.make_callable( fetches=output_tensors, feed_list=input_tensors, accept_options=self.ACCEPT_OPTIONS) else: self._callable = None else: log_once( "TF>=1.2 is recommended for better performance of predictor!", 'warn')
def get_model(): if config.MODE_FPN: if get_tf_version_number() < 1.6: logger.warn("FPN has chances to crash in TF<1.6, due to a TF issue.") return ResNetFPNModel() else: return ResNetC4Model()
def get_model(): if config.MODE_FPN: if get_tf_version_number() < 1.6: logger.warn("FPN has chances to crash in TF<1.6, due to a TF issue.") return ResNetFPNModel() else: return ResNetC4Model()
def monkeypatch_tf_layers(): if get_tf_version_number() < 1.4: if not hasattr(tf.layers, 'Dense'): from tensorflow.python.layers.core import Dense tf.layers.Dense = Dense from tensorflow.python.layers.normalization import BatchNormalization tf.layers.BatchNormalization = BatchNormalization from tensorflow.python.layers.convolutional import Conv2DTranspose, Conv2D tf.layers.Conv2DTranspose = Conv2DTranspose tf.layers.Conv2D = Conv2D from tensorflow.python.layers.pooling import MaxPooling2D, AveragePooling2D tf.layers.MaxPooling2D = MaxPooling2D tf.layers.AveragePooling2D = AveragePooling2D
def Conv3D( inputs, filters, kernel_size, strides=(1, 1, 1), padding='same', data_format='channels_last', dilation_rate=(1, 1, 1), activation=None, use_bias=True, kernel_initializer=tf.contrib.layers.variance_scaling_initializer(2.0), bias_initializer=tf.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, split=1): """ A wrapper around `tf.layers.Conv2D`. Some differences to maintain backward-compatibility: 1. Default kernel initializer is variance_scaling_initializer(2.0). 2. Default padding is 'same'. 3. Support 'split' argument to do group conv. Variable Names: * ``W``: weights * ``b``: bias """ if split == 1: with rename_get_variable({'kernel': 'W', 'bias': 'b'}): layer = tf.layers.Conv3D(filters, kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer) ret = layer.apply(inputs, scope=tf.get_variable_scope()) ret = tf.identity(ret, name='output') ret.variables = VariableHolder(W=layer.kernel) if use_bias: ret.variables.b = layer.bias else: # group conv implementation data_format = get_data_format(data_format, tfmode=False) in_shape = inputs.get_shape().as_list() channel_axis = 3 if data_format == 'NHWC' else 1 in_channel = in_shape[channel_axis] assert in_channel is not None, "[Conv3D] Input cannot have unknown channel!" assert in_channel % split == 0 assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \ "Not supported by group conv now!" out_channel = filters assert out_channel % split == 0 assert dilation_rate == (1, 1) or get_tf_version_number( ) >= 1.5, 'TF>=1.5 required for group dilated conv' kernel_shape = shape2d(kernel_size) filter_shape = kernel_shape + [in_channel / split, out_channel] stride = shape4d(strides, data_format=data_format) kwargs = dict(data_format=data_format) if get_tf_version_number() >= 1.5: kwargs['dilations'] = shape4d(dilation_rate, data_format=data_format) W = tf.get_variable('W', filter_shape, initializer=kernel_initializer) if use_bias: b = tf.get_variable('b', [out_channel], initializer=bias_initializer) inputs = tf.split(inputs, split, channel_axis) kernels = tf.split(W, split, 3) outputs = [ tf.nn.conv2d(i, k, stride, padding.upper(), **kwargs) for i, k in zip(inputs, kernels) ] conv = tf.concat(outputs, channel_axis) if activation is None: activation = tf.identity ret = activation(tf.nn.bias_add(conv, b, data_format=data_format) if use_bias else conv, name='output') ret.variables = VariableHolder(W=W) if use_bias: ret.variables.b = b return ret
self._eval() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--load', help='load a model for evaluation or training. Can overwrite BACKBONE.WEIGHTS') parser.add_argument('--logdir', help='log directory', default='train_log/maskrcnn') parser.add_argument('--visualize', action='store_true', help='visualize intermediate results') parser.add_argument('--evaluate', help="Run evaluation on COCO. " "This argument is the path to the output json evaluation file") parser.add_argument('--predict', help="Run prediction on a given image. " "This argument is the path to the input image file") parser.add_argument('--config', help="A list of KEY=VALUE to overwrite those defined in config.py", nargs='+') if get_tf_version_number() < 1.6: # https://github.com/tensorflow/tensorflow/issues/14657 logger.warn("TF<1.6 has a bug which may lead to crash in FasterRCNN training if you're unlucky.") args = parser.parse_args() cfg.update_args(args.config) MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model() if args.visualize or args.evaluate or args.predict: assert args.load finalize_configs(is_training=False) if args.predict or args.visualize: cfg.TEST.RESULT_SCORE_THRESH = cfg.TEST.RESULT_SCORE_THRESH_VIS
def BatchNorm3d(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5, center=True, scale=True, beta_initializer=tf.zeros_initializer(), gamma_initializer=tf.ones_initializer(), virtual_batch_size=None, data_format='channels_last', internal_update=False, sync_statistics=None): """ Almost equivalent to `tf.layers.batch_normalization`, but different (and more powerful) in the following: 1. Accepts an alternative `data_format` option when `axis` is None. For 2D input, this argument will be ignored. 2. Default value for `momentum` and `epsilon` is different. 3. Default value for `training` is automatically obtained from tensorpack's `TowerContext`, but can be overwritten. 4. Support the `internal_update` option, which enables the use of BatchNorm layer inside conditionals. 5. Support the `sync_statistics` option, which is very useful in small-batch models. Args: internal_update (bool): if False, add EMA update ops to `tf.GraphKeys.UPDATE_OPS`. If True, update EMA inside the layer by control dependencies. They are very similar in speed, but `internal_update=True` can be used when you have conditionals in your model, or when you have multiple networks to train. Corresponding TF issue: https://github.com/tensorflow/tensorflow/issues/14699 sync_statistics: either None or "nccl". By default (None), it uses statistics of the input tensor to normalize. When set to "nccl", this layer must be used under tensorpack multi-gpu trainers, and it then uses per-machine (multiple GPU) statistics to normalize. Note that this implementation averages the per-tower E[x] and E[x^2] among towers to compute global mean&variance. The result is the global mean&variance only if each tower has the same batch size. This option has no effect when not training. This option is also known as "Cross-GPU BatchNorm" as mentioned in https://arxiv.org/abs/1711.07240. Corresponding TF issue: https://github.com/tensorflow/tensorflow/issues/18222 Variable Names: * ``beta``: the bias term. Will be zero-inited by default. * ``gamma``: the scale term. Will be one-inited by default. * ``mean/EMA``: the moving average of mean. * ``variance/EMA``: the moving average of variance. Note: Combinations of ``training`` and ``ctx.is_training``: * ``training == ctx.is_training``: standard BN, EMA are maintained during training and used during inference. This is the default. * ``training and not ctx.is_training``: still use batch statistics in inference. * ``not training and ctx.is_training``: use EMA to normalize in training. This is useful when you load a pre-trained BN and don't want to fine tune the EMA. EMA will not be updated in this case. """ # parse shapes data_format = get_data_format(data_format, tfmode=False) shape = inputs.get_shape().as_list() ndims = len(shape) # in 3d conv, we have 5d dim [batch, c, d, h, w] # assert ndims in [2, 4], ndims if sync_statistics is not None: sync_statistics = sync_statistics.lower() assert sync_statistics in [None, 'nccl', 'horovod'], sync_statistics if axis is None: if ndims == 2: data_format = 'NHWC' axis = 1 elif ndims == 5: axis = 1 if data_format == 'NCHW' else 4 else: axis = 1 if data_format == 'NCHW' else 3 else: data_format = 'NCHW' if axis == 1 else 'NHWC' num_chan = shape[axis] # parse training/ctx ctx = get_current_tower_context() if training is None: training = ctx.is_training training = bool(training) TF_version = get_tf_version_number() if not training and ctx.is_training: assert TF_version >= 1.4, \ "Fine tuning a BatchNorm model with fixed statistics is only " \ "supported after https://github.com/tensorflow/tensorflow/pull/12580 " if ctx.is_main_training_tower: # only warn in first tower logger.warn( "[BatchNorm] Using moving_mean/moving_variance in training.") # Using moving_mean/moving_variance in training, which means we # loaded a pre-trained BN and only fine-tuning the affine part. if sync_statistics is None or not (training and ctx.is_training): coll_bk = backup_collection([tf.GraphKeys.UPDATE_OPS]) with rename_get_variable({ 'moving_mean': 'mean/EMA', 'moving_variance': 'variance/EMA' }): tf_args = dict(axis=axis, momentum=momentum, epsilon=epsilon, center=center, scale=scale, beta_initializer=beta_initializer, gamma_initializer=gamma_initializer, fused=True, _reuse=tf.get_variable_scope().reuse) if TF_version >= 1.5: tf_args['virtual_batch_size'] = virtual_batch_size else: assert virtual_batch_size is None, "Feature not supported in this version of TF!" layer = tf.layers.BatchNormalization(**tf_args) xn = layer.apply(inputs, training=training, scope=tf.get_variable_scope()) # maintain EMA only on one GPU is OK, even in replicated mode. # because during training, EMA isn't used if ctx.is_main_training_tower: for v in layer.non_trainable_variables: add_model_variable(v) if not ctx.is_main_training_tower or internal_update: restore_collection(coll_bk) if training and internal_update: assert layer.updates with tf.control_dependencies(layer.updates): ret = tf.identity(xn, name='output') else: ret = tf.identity(xn, name='output') vh = ret.variables = VariableHolder( moving_mean=layer.moving_mean, mean=layer.moving_mean, # for backward-compatibility moving_variance=layer.moving_variance, variance=layer.moving_variance) # for backward-compatibility if scale: vh.gamma = layer.gamma if center: vh.beta = layer.beta else: red_axis = [0] if ndims == 2 else ( [0, 2, 3] if axis == 1 else [0, 1, 2]) if ndims == 5: red_axis = [0, 2, 3, 4] if axis == 1 else [0, 1, 2, 3] new_shape = None # don't need to reshape unless ... if ndims == 4 and axis == 1: new_shape = [1, num_chan, 1, 1] if ndims == 5 and axis == 1: new_shape = [1, num_chan, 1, 1, 1] batch_mean = tf.reduce_mean(inputs, axis=red_axis) batch_mean_square = tf.reduce_mean(tf.square(inputs), axis=red_axis) if sync_statistics == 'nccl': if six.PY3 and TF_version <= 1.8 and ctx.is_main_training_tower: logger.warn( "A TensorFlow bug will cause cross-GPU BatchNorm to fail. " "Apply this patch: https://github.com/tensorflow/tensorflow/pull/20360" ) from tensorflow.contrib.nccl.ops import gen_nccl_ops shared_name = re.sub('tower[0-9]+/', '', tf.get_variable_scope().name) num_dev = ctx.total batch_mean = gen_nccl_ops.nccl_all_reduce( input=batch_mean, reduction='sum', num_devices=num_dev, shared_name=shared_name + '_NCCL_mean') * (1.0 / num_dev) batch_mean_square = gen_nccl_ops.nccl_all_reduce( input=batch_mean_square, reduction='sum', num_devices=num_dev, shared_name=shared_name + '_NCCL_mean_square') * (1.0 / num_dev) elif sync_statistics == 'horovod': # Require https://github.com/uber/horovod/pull/331 # Proof-of-concept, not ready yet. import horovod.tensorflow as hvd batch_mean = hvd.allreduce(batch_mean, average=True) batch_mean_square = hvd.allreduce(batch_mean_square, average=True) batch_var = batch_mean_square - tf.square(batch_mean) batch_mean_vec = batch_mean batch_var_vec = batch_var beta, gamma, moving_mean, moving_var = get_bn_variables( num_chan, scale, center, beta_initializer, gamma_initializer) if new_shape is not None: batch_mean = tf.reshape(batch_mean, new_shape) batch_var = tf.reshape(batch_var, new_shape) # Using fused_batch_norm(is_training=False) is actually slightly faster, # but hopefully this call will be JITed in the future. xn = tf.nn.batch_normalization(inputs, batch_mean, batch_var, tf.reshape(beta, new_shape), tf.reshape(gamma, new_shape), epsilon) else: xn = tf.nn.batch_normalization(inputs, batch_mean, batch_var, beta, gamma, epsilon) if ctx.is_main_training_tower: ret = update_bn_ema(xn, batch_mean_vec, batch_var_vec, moving_mean, moving_var, momentum, internal_update) else: ret = tf.identity(xn, name='output') vh = ret.variables = VariableHolder( moving_mean=moving_mean, mean=moving_mean, # for backward-compatibility moving_variance=moving_var, variance=moving_var) # for backward-compatibility if scale: vh.gamma = gamma if center: vh.beta = beta return ret
def build_graph(self, *inputs): is_training = get_current_tower_context().is_training if config.MODE_MASK: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs else: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs image = self.preprocess(image) # 1CHW featuremap = resnet_c4_backbone(image, config.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024, config.NUM_ANCHOR) fm_anchors, anchor_labels, anchor_boxes = self.narrow_to_featuremap( featuremap, get_all_anchors(), anchor_labels, anchor_boxes) anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = decode_bbox_target( rpn_box_logits, fm_anchors) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, config.TRAIN_PRE_NMS_TOPK if is_training else config.TEST_PRE_NMS_TOPK, config.TRAIN_POST_NMS_TOPK if is_training else config.TEST_POST_NMS_TOPK) if is_training: # sample proposal boxes in training rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. # Use all proposal boxes in inference rcnn_boxes = proposal_boxes boxes_on_featuremap = rcnn_boxes * (1.0 / config.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657 # which was fixed in TF 1.6 def ff_true(): feature_fastrcnn = resnet_conv5( roi_resized, config.RESNET_NUM_BLOCK[-1]) # nxcx7x7 feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn', feature_gap, config.NUM_CLASS) # Return C5 feature to be shared with mask branch return feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits def ff_false(): ncls = config.NUM_CLASS return tf.zeros([0, 2048, 7, 7]), tf.zeros([0, ncls]), tf.zeros([0, ncls - 1, 4]) if get_tf_version_number() >= 1.6: feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = ff_true( ) else: logger.warn("This example may drop support for TF < 1.6 soon.") feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = tf.cond( tf.size(boxes_on_featuremap) > 0, ff_true, ff_false) if is_training: # rpn loss rpn_label_loss, rpn_box_loss = rpn_losses(anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits) # fastrcnn loss matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt) fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample) fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample) fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training( image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes, fastrcnn_label_logits, fg_fastrcnn_box_logits) if config.MODE_MASK: # maskrcnn loss fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample) # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, config.NUM_CLASS, num_convs=0) # #fg x #cat x 14x14 matched_gt_masks = tf.gather(gt_masks, fg_inds_wrt_gt) # nfg x H x W target_masks_for_fg = crop_and_resize( tf.expand_dims(matched_gt_masks, 1), fg_sampled_boxes, tf.range(tf.size(fg_inds_wrt_gt)), 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg) else: mrcnn_loss = 0.0 wd_cost = regularize_cost( '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W', l2_regularizer(1e-4), name='wd_cost') total_cost = tf.add_n([ rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, mrcnn_loss, wd_cost ], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: final_boxes, final_labels = self.fastrcnn_inference( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) if config.MODE_MASK: # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657 def f1(): roi_resized = roi_align( featuremap, final_boxes * (1.0 / config.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5( roi_resized, config.RESNET_NUM_BLOCK[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, config.NUM_CLASS, 0) # #result x #cat x 14x14 indices = tf.stack([ tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1 ], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 return tf.sigmoid(final_mask_logits) final_masks = tf.cond( tf.size(final_labels) > 0, f1, lambda: tf.zeros([0, 14, 14])) tf.identity(final_masks, name='final_masks')
def BatchNorm_SplitGPU(x, use_local_stat=None, decay=0.9, epsilon=1e-5, use_scale=True, use_bias=True, gamma_init=tf.constant_initializer(1.0), data_format='NHWC', internal_update=False, split_num = 1): """ """ print split_num if data_format == 'channels_last': data_format = 'NHWC' assert data_format == 'NHWC' shape = x.get_shape().as_list() ndims = len(shape) assert ndims in [2, 4] if ndims == 2: data_format = 'NHWC' if data_format == 'NCHW': n_out = shape[1] else: n_out = shape[-1] # channel assert n_out is not None, "Input to BatchNorm cannot have unknown channels!" beta, gamma, moving_mean, moving_var = get_bn_variables(n_out, use_scale, use_bias, gamma_init) ctx = get_current_tower_context() if use_local_stat is None: use_local_stat = ctx.is_training use_local_stat = bool(use_local_stat) if use_local_stat: if ndims == 2: x = tf.reshape(x, [-1, 1, 1, n_out]) # fused_bn only takes 4D input # fused_bn has error using NCHW? (see #190) inputs = tf.concat(tf.split(x, split_num, 0), -1) # N/S_n x H x W x C*S_n beta_, gamma_ = None, None beta_ = tf.reshape([beta]*split_num, [-1]) gamma_ = tf.reshape([gamma]*split_num, [-1]) xn, batch_mean, batch_var = tf.nn.fused_batch_norm(inputs, gamma_, beta_,epsilon=epsilon,is_training=True, data_format=data_format) xn = tf.concat(tf.split(xn, split_num, 3), 0) """ """ # inputs = tf.concat(tf.split(x, split_num, 0), -1) # N/split_num x H x W x C*split_num # axis = [0, 1, 2] # batch_mean, batch_var = tf.nn.moments(inputs, axis) # C*split_num # beta_, gamma_ = None, None # beta_ = tf.reshape([beta]*split_num, [-1]) # gamma_ = tf.reshape([gamma]*split_num, [-1]) # xn = tf.nn.batch_normalization(inputs, batch_mean, batch_var, beta_, gamma_, epsilon) # xn = tf.concat(tf.split(xn, split_num, 3), 0) if ndims == 2: xn = tf.squeeze(xn, [1, 2]) else: if ctx.is_training: assert get_tf_version_number() < 1.4, \ "Fine tuning a BatchNorm model with fixed statistics is only " \ "supported after https://github.com/tensorflow/tensorflow/pull/12580 " if ctx.is_main_training_tower: # only warn in first tower logger.warn("[BatchNorm] Using moving_mean/moving_variance in training.") # Using moving_mean/moving_variance in training, which means we # loaded a pre-trained BN and only fine-tuning the affine part. xn, batch_mean, batch_var = tf.nn.fused_batch_norm( x, gamma, beta, mean=moving_mean, variance=moving_var, epsilon=epsilon, data_format=data_format, is_training=False) else: if ndims == 4 and data_format == 'NCHW': [g, b, mm, mv] = [reshape_for_bn(_, ndims, n_out, data_format) for _ in [gamma, beta, moving_mean, moving_var]] xn = tf.nn.batch_normalization(x, mm, mv, b, g, epsilon) batch_mean = tf.concat([moving_mean] * split_num, 0) batch_var = tf.concat([moving_var] * split_num, 0) else: # avoid the reshape if possible (when channel is the last dimension) xn = tf.nn.batch_normalization( x, moving_mean, moving_var, beta, gamma, epsilon) batch_mean = tf.concat([moving_mean] * split_num, 0) batch_var = tf.concat([moving_var] * split_num, 0) # maintain EMA only on one GPU is OK, even in replicated mode. # because training time doesn't use EMA if ctx.is_main_training_tower: add_model_variable(moving_mean) add_model_variable(moving_var) if ctx.is_main_training_tower and use_local_stat: # print (xn) ret = update_bn_ema(xn, batch_mean[:n_out], batch_var[:n_out], moving_mean, moving_var, decay, internal_update) else: ret = tf.identity(xn, name='output') ret = tf.identity(xn, name='output') vh = ret.variables = VariableHolder(mean=moving_mean, variance=moving_var) if use_scale: vh.gamma = gamma if use_bias: vh.beta = beta assert batch_mean is not None, 'batch_mean outputs is None' return ret
def build_graph(self, *inputs): is_training = get_current_tower_context().is_training if config.MODE_MASK: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs else: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs image = self.preprocess(image) # 1CHW featuremap = resnet_c4_backbone(image, config.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024, config.NUM_ANCHOR) fm_anchors, anchor_labels, anchor_boxes = self.narrow_to_featuremap( featuremap, get_all_anchors(), anchor_labels, anchor_boxes) anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = decode_bbox_target(rpn_box_logits, fm_anchors) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, config.TRAIN_PRE_NMS_TOPK if is_training else config.TEST_PRE_NMS_TOPK, config.TRAIN_POST_NMS_TOPK if is_training else config.TEST_POST_NMS_TOPK) if is_training: # sample proposal boxes in training rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. # Use all proposal boxes in inference rcnn_boxes = proposal_boxes boxes_on_featuremap = rcnn_boxes * (1.0 / config.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657 # which was fixed in TF 1.6 def ff_true(): feature_fastrcnn = resnet_conv5(roi_resized, config.RESNET_NUM_BLOCK[-1]) # nxcx7x7 feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs('fastrcnn', feature_gap, config.NUM_CLASS) # Return C5 feature to be shared with mask branch return feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits def ff_false(): ncls = config.NUM_CLASS return tf.zeros([0, 2048, 7, 7]), tf.zeros([0, ncls]), tf.zeros([0, ncls - 1, 4]) if get_tf_version_number() >= 1.6: feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = ff_true() else: logger.warn("This example may drop support for TF < 1.6 soon.") feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = tf.cond( tf.size(boxes_on_featuremap) > 0, ff_true, ff_false) if is_training: # rpn loss rpn_label_loss, rpn_box_loss = rpn_losses( anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits) # fastrcnn loss matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt) fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample) fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample) fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training( image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes, fastrcnn_label_logits, fg_fastrcnn_box_logits) if config.MODE_MASK: # maskrcnn loss fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample) # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, config.NUM_CLASS, num_convs=0) # #fg x #cat x 14x14 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), fg_sampled_boxes, fg_inds_wrt_gt, 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg) else: mrcnn_loss = 0.0 wd_cost = regularize_cost( '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W', l2_regularizer(1e-4), name='wd_cost') total_cost = tf.add_n([ rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, mrcnn_loss, wd_cost], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: final_boxes, final_labels = self.fastrcnn_inference( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) if config.MODE_MASK: # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657 def f1(): roi_resized = roi_align(featuremap, final_boxes * (1.0 / config.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5(roi_resized, config.RESNET_NUM_BLOCK[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, config.NUM_CLASS, 0) # #result x #cat x 14x14 indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 return tf.sigmoid(final_mask_logits) final_masks = tf.cond(tf.size(final_labels) > 0, f1, lambda: tf.zeros([0, 14, 14])) tf.identity(final_masks, name='final_masks')
self._eval() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--load', help='load a model for evaluation or training') parser.add_argument('--logdir', help='log directory', default='train_log/maskrcnn') parser.add_argument('--config', help="A list of KEY=VALUE to overwrite those defined in config.py", nargs='+') parser.add_argument('--visualize', action='store_true', help='visualize intermediate results') parser.add_argument('--evaluate', help="Run evaluation on COCO. " "This argument is the path to the output json evaluation file") parser.add_argument('--predict', help="Run prediction on a given image. " "This argument is the path to the input image file") if get_tf_version_number() < 1.6: # https://github.com/tensorflow/tensorflow/issues/14657 logger.warn("TF<1.6 has a bug which may lead to crash in FasterRCNN training if you're unlucky.") args = parser.parse_args() cfg.update_args(args.config) MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model() if args.visualize or args.evaluate or args.predict: assert args.load finalize_configs(is_training=False) if args.predict or args.visualize: cfg.TEST.RESULT_SCORE_THRESH = cfg.TEST.RESULT_SCORE_THRESH_VIS
def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5, center=True, scale=True, beta_initializer=tf.zeros_initializer(), gamma_initializer=tf.ones_initializer(), virtual_batch_size=None, internal_update=False): """ Mostly equivalent to `tf.layers.batch_normalization`, but different in the following: 1. Accepts `data_format` when `axis` is None. For 2D input, this argument will be ignored. 2. Default value for `momentum` and `epsilon` is different. 3. Default value for `training` is automatically obtained from `TowerContext`. 4. Support the `internal_update` option. Args: internal_update (bool): if False, add EMA update ops to `tf.GraphKeys.UPDATE_OPS`. If True, update EMA inside the layer by control dependencies. Variable Names: * ``beta``: the bias term. Will be zero-inited by default. * ``gamma``: the scale term. Will be one-inited by default. Input will be transformed by ``x * gamma + beta``. * ``mean/EMA``: the moving average of mean. * ``variance/EMA``: the moving average of variance. Note: 1. About multi-GPU training: moving averages across GPUs are not aggregated. Batch statistics are computed independently. This is consistent with most frameworks. 2. Combinations of ``training`` and ``ctx.is_training``: * ``training == ctx.is_training``: standard BN, EMA are maintained during training and used during inference. This is the default. * ``training and not ctx.is_training``: still use batch statistics in inference. * ``not training and ctx.is_training``: use EMA to normalize in training. This is useful when you load a pre-trained BN and don't want to fine tune the EMA. EMA will not be updated in this case. """ # parse shapes shape = inputs.get_shape().as_list() ndims = len(shape) assert axis is not None # parse training/ctx ctx = get_current_tower_context() if training is None: training = ctx.is_training training = bool(training) TF_version = get_tf_version_number() if not training and ctx.is_training: assert TF_version >= 1.4, \ "Fine tuning a BatchNorm model with fixed statistics is only " \ "supported after https://github.com/tensorflow/tensorflow/pull/12580 " if ctx.is_main_training_tower: # only warn in first tower logger.warn( "[BatchNorm] Using moving_mean/moving_variance in training.") # Using moving_mean/moving_variance in training, which means we # loaded a pre-trained BN and only fine-tuning the affine part. coll_bk = backup_collection([tf.GraphKeys.UPDATE_OPS]) with rename_get_variable({ 'moving_mean': 'mean/EMA', 'moving_variance': 'variance/EMA' }): if TF_version >= 1.5: layer = tf.layers.BatchNormalization( axis=axis, momentum=momentum, epsilon=epsilon, center=center, scale=scale, beta_initializer=beta_initializer, gamma_initializer=gamma_initializer, virtual_batch_size=virtual_batch_size, fused=True, _reuse=tf.get_variable_scope().reuse) else: assert virtual_batch_size is None, "Feature not supported in this version of TF!" layer = tf.layers.BatchNormalization( axis=axis, momentum=momentum, epsilon=epsilon, center=center, scale=scale, beta_initializer=beta_initializer, gamma_initializer=gamma_initializer, fused=True, _reuse=tf.get_variable_scope().reuse) xn = layer.apply(inputs, training=training, scope=tf.get_variable_scope()) # maintain EMA only on one GPU is OK, even in replicated mode. # because training time doesn't use EMA if ctx.is_main_training_tower: for v in layer.non_trainable_variables: add_model_variable(v) if not ctx.is_main_training_tower or internal_update: restore_collection(coll_bk) if training and internal_update: assert layer.updates with tf.control_dependencies(layer.updates): ret = tf.identity(xn, name='output') else: ret = tf.identity(xn, name='output') vh = ret.variables = VariableHolder( moving_mean=layer.moving_mean, mean=layer.moving_mean, # for backward-compatibility moving_variance=layer.moving_variance, variance=layer.moving_variance) # for backward-compatibility if scale: vh.gamma = layer.gamma if center: vh.beta = layer.beta return ret
def test(self): if get_tf_version_number() < 1.4: return True # requires leaky_relu self.assertSurvive(self.script, args=None)