def _setup_graph(self): num_gpu = cfg.TRAIN.NUM_GPUS if cfg.TRAINER == 'replicated': # TF bug in version 1.11, 1.12: https://github.com/tensorflow/tensorflow/issues/22750 buggy_tf = get_tf_version_tuple() in [(1, 11), (1, 12)] # Use two predictor threads per GPU to get better throughput self.num_predictor = num_gpu if buggy_tf else num_gpu * 2 self.predictors = [ self._build_predictor(k % num_gpu) for k in range(self.num_predictor) ] self.dataflows = [ get_eval_dataflow(self._eval_dataset, shard=k, num_shards=self.num_predictor) for k in range(self.num_predictor) ] else: # Eval on all ranks and use gather self.predictor = self._build_predictor(0) if self.batched: self.dataflow = get_batched_eval_dataflow( self._eval_dataset, shard=hvd.rank(), num_shards=hvd.size(), batch_size=self.batch_size) else: self.dataflow = get_eval_dataflow(self._eval_dataset, shard=hvd.rank(), num_shards=hvd.size())
def maskrcnn_upXconv_head(feature, num_category, num_convs, norm=None): """ Args: feature (NxCx s x s): size is 7 in C4 models and 14 in FPN models. num_category(int): num_convs (int): number of convolution layers norm (str or None): either None or 'GN' Returns: mask_logits (N x num_category x 2s x 2s): """ assert norm in [None, 'GN'], norm l = feature with argscope([Conv2D, Conv2DTranspose], data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='untruncated_normal' if get_tf_version_tuple() >= (1, 12) else 'normal')): # c2's MSRAFill is fan_out for k in range(num_convs): l = Conv2D('fcn{}'.format(k), l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu) if norm is not None: l = GroupNorm('gn{}'.format(k), l) l = Conv2DTranspose('deconv', l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu) l = Conv2D('conv', l, num_category, 1) return l
def f(X): """ prob: n probabilities box: nx4 boxes Returns: n boolean, the selection """ prob, box = X output_shape = tf.shape(prob, out_type=tf.int64) # filter by score threshold ids = tf.reshape(tf.where(prob > cfg.TEST.RESULT_SCORE_THRESH), [-1]) prob = tf.gather(prob, ids) box = tf.gather(box, ids) # NMS within each class selection = tf.image.non_max_suppression( box, prob, cfg.TEST.RESULTS_PER_IM, cfg.TEST.FRCNN_NMS_THRESH) selection = tf.gather(ids, selection) if get_tf_version_tuple() >= (1, 13): sorted_selection = tf.sort(selection, direction='ASCENDING') mask = tf.sparse.SparseTensor(indices=tf.expand_dims(sorted_selection, 1), values=tf.ones_like(sorted_selection, dtype=tf.bool), dense_shape=output_shape) mask = tf.sparse.to_dense(mask, default_value=False) else: # this function is deprecated by TF sorted_selection = -tf.nn.top_k(-selection, k=tf.size(selection))[0] mask = tf.sparse_to_dense( sparse_indices=sorted_selection, output_shape=output_shape, sparse_values=True, default_value=False) return mask
def mpusim_fully_connected(inputs, units, activation=None, use_bias=True, kernel_initializer=None, bias_initializer=tf.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, activations_datatype_size_byte=1, weights_datatype_size_byte=1, results_datatype_size_byte=4, systolic_array_height=256, systolic_array_width=256, activation_fifo_depth=8, accumulator_array_height=4096, log_file_output_dir='.', model_name='unnamed'): """ A wrapper around `mpusim_fc`. One difference to maintain backward-compatibility: Default weight initializer is variance_scaling_initializer(2.0). Variable Names: * ``W``: weights of shape [in_dim, out_dim] * ``b``: bias """ if kernel_initializer is None: if get_tf_version_tuple() <= (1, 12): kernel_initializer = tf.contrib.layers.variance_scaling_initializer(2.0) # deprecated else: kernel_initializer = tf.keras.initializers.VarianceScaling(2.0, distribution='untruncated_normal') inputs = batch_flatten(inputs) with rename_get_variable({'kernel': 'W', 'bias': 'b'}): layer = mpusim_fc(units=units, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, activations_datatype_size_byte=activations_datatype_size_byte, weights_datatype_size_byte=weights_datatype_size_byte, results_datatype_size_byte=results_datatype_size_byte, systolic_array_height=systolic_array_height, systolic_array_width=systolic_array_width, activation_fifo_depth=activation_fifo_depth, accumulator_array_height=accumulator_array_height, log_file_output_dir=log_file_output_dir, model_name=model_name, _reuse=tf.get_variable_scope().reuse) ret = layer.apply(inputs, scope=tf.get_variable_scope()) ret = tf.identity(ret, name='output') ret.variables = VariableHolder(W=layer.kernel) if use_bias: ret.variables.b = layer.bias return ret
def roi_func_extra(boxes, already_aligned_features=None): if already_aligned_features is None: aligned_features = multilevel_roi_align(features[:4], boxes, 7) else: # for hard example mining aligned_features = already_aligned_features tiled = tf.tile(roi_aligned_extra_features, [tf.shape(aligned_features)[0], 1, 1, 1]) concat_features = tf.concat((tiled, aligned_features), axis=1) with argscope(Conv2D, data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='untruncated_normal' if get_tf_version_tuple() >= (1, 12) else 'normal')): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): reduced_features = Conv2D('conv_reduce', concat_features, 256, 1, activation=None) return reduced_features
def _setup_graph(self): num_gpu = cfg.TRAIN.NUM_GPUS if cfg.TRAINER == 'replicated': # TF bug in version 1.11, 1.12: https://github.com/tensorflow/tensorflow/issues/22750 buggy_tf = get_tf_version_tuple() in [(1, 11), (1, 12)] # Use two predictor threads per GPU to get better throughput self.num_predictor = num_gpu if buggy_tf else num_gpu * 2 self.predictors = [ self._build_predictor(k % num_gpu) for k in range(self.num_predictor) ] self.dataflows = [ get_eval_dataflow(self._eval_dataset, shard=k, num_shards=self.num_predictor) for k in range(self.num_predictor) ] else: # Only eval on the first machine, # Because evaluation assumes that all horovod workers share the filesystem. # Alternatively, can eval on all ranks and use allgather, but allgather sometimes hangs self._horovod_run_eval = hvd.rank() == hvd.local_rank() if self._horovod_run_eval: self.predictor = self._build_predictor(0) self.dataflow = get_eval_dataflow(self._eval_dataset, shard=hvd.local_rank(), num_shards=hvd.local_size()) self.barrier = hvd.allreduce(tf.random_normal(shape=[1]))
def conv(x, filters, kernel, strides=1, name=None): return Conv2D(filters, kernel, name=name, strides=strides, use_bias=False, padding='same', kernel_initializer=tf.keras.initializers.VarianceScaling( scale=2.0, mode='fan_out', distribution='untruncated_normal' if get_tf_version_tuple() >= (1, 12) else 'normal'), kernel_regularizer=tf.keras.regularizers.l2(5e-5))(x)
def f(X): """ prob: n probabilities box: nx4 boxes Returns: n boolean, the selection """ prob, box = X # filter by score threshold ids = tf.reshape(tf.where(prob > cfg.TEST.RESULT_SCORE_THRESH), [-1]) prob = tf.gather(prob, ids) box = tf.gather(box, ids) # NMS within each class selection = tf.image.non_max_suppression(box, prob, cfg.TEST.RESULTS_PER_IM, cfg.TEST.FRCNN_NMS_THRESH) selection = tf.to_int32(tf.gather(ids, selection)) # sort available in TF>1.4.0 # sorted_selection = tf.contrib.framework.sort(selection, direction='ASCENDING') sorted_selection = -tf.nn.top_k(-selection, k=tf.size(selection))[0] if get_tf_version_tuple() >= (1, 12): mask = tf.sparse.SparseTensor(indices=sorted_selection, values=tf.ones_like(sorted_selection, dtype=tf.bool), dense_shape=tf.shape(prob)) mask = tf.sparse.to_dense(mask, default_value=False) else: # deprecated by TF mask = tf.sparse_to_dense(sparse_indices=sorted_selection, output_shape=tf.shape(prob), sparse_values=True, default_value=False) return mask
def maskrcnn_upXconv_head(feature, num_category, seed_gen, num_convs, norm=None, fp16=False): """ Args: feature: roi feature maps, Num_boxes x NumChannel x H_roi x W_roi, num_category(int): Number of total classes num_convs (int): number of convolution layers norm (str or None): either None or 'GN' Returns: mask_logits: Num_boxes x num_category x (2 * H_roi) x (2 * W_roi) """ assert norm in [None, 'GN'], norm l = feature if fp16: l = tf.cast(l, tf.float16) with mixed_precision_scope(mixed=fp16): with argscope([Conv2D, Conv2DTranspose], data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', seed=seed_gen.next(), distribution='untruncated_normal' if get_tf_version_tuple() >= (1, 12) else 'normal')): # c2's MSRAFill is fan_out for k in range(num_convs): l = Conv2D('fcn{}'.format(k), l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu, seed=seed_gen.next()) if norm is not None: if fp16: l = tf.cast(l, tf.float32) l = GroupNorm('gn{}'.format(k), l) if fp16: l = tf.cast(l, tf.float16) l = Conv2DTranspose('deconv', l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu, seed=seed_gen.next()) # 2x upsampling l = Conv2D('conv', l, num_category, 1, seed=seed_gen.next()) if fp16: l = tf.cast(l, tf.float32) return l
def Dropout(x, *args, **kwargs): """ Same as `tf.layers.dropout`. However, for historical reasons, the first positional argument is interpreted as keep_prob rather than drop_prob. Explicitly use `rate=` keyword arguments to ensure things are consistent. """ if 'is_training' in kwargs: kwargs['training'] = kwargs.pop('is_training') if len(args) > 0: if args[0] != 0.5: logger.warn( "The first positional argument to tensorpack.Dropout is the probability to keep, rather than to drop. " "This is different from the rate argument in tf.layers.Dropout due to historical reasons. " "To mimic tf.layers.Dropout, explicitly use keyword argument 'rate' instead" ) rate = 1 - args[0] elif 'keep_prob' in kwargs: assert 'rate' not in kwargs, "Cannot set both keep_prob and rate!" rate = 1 - kwargs.pop('keep_prob') elif 'rate' in kwargs: rate = kwargs.pop('rate') else: rate = 0.5 if kwargs.get('training', None) is None: kwargs['training'] = get_current_tower_context().is_training if get_tf_version_tuple() <= (1, 12): return tf.layers.dropout(x, rate=rate, **kwargs) else: return tf.nn.dropout(x, rate=rate if kwargs['training'] else 0.)
def fastrcnn_losses(labels, label_logits, fg_boxes, fg_box_logits): """ Args: labels: n, label_logits: nxC fg_boxes: nfgx4, encoded fg_box_logits: nfgxCx4 or nfgx1x4 if class agnostic Returns: label_loss, box_loss """ label_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=label_logits) label_loss = tf.reduce_mean(label_loss, name='label_loss') fg_inds = tf.where(labels > 0)[:, 0] fg_labels = tf.gather(labels, fg_inds) num_fg = tf.size(fg_inds, out_type=tf.int64) empty_fg = tf.equal(num_fg, 0) if int(fg_box_logits.shape[1]) > 1: if get_tf_version_tuple() >= (1, 14): fg_labels = tf.expand_dims(fg_labels, axis=1) # nfg x 1 fg_box_logits = tf.gather(fg_box_logits, fg_labels, batch_dims=1) else: indices = tf.stack([tf.range(num_fg), fg_labels], axis=1) # nfgx2 fg_box_logits = tf.gather_nd(fg_box_logits, indices) fg_box_logits = tf.reshape(fg_box_logits, [-1, 4]) # nfg x 4 with tf.name_scope('label_metrics'), tf.device('/cpu:0'): prediction = tf.argmax(label_logits, axis=1, name='label_prediction') correct = tf.cast( tf.equal(prediction, labels), tf.float32) # boolean/integer gather is unavailable on GPU accuracy = tf.reduce_mean(correct, name='accuracy') fg_label_pred = tf.argmax(tf.gather(label_logits, fg_inds), axis=1) num_zero = tf.reduce_sum(tf.cast(tf.equal(fg_label_pred, 0), tf.int64), name='num_zero') false_negative = tf.where(empty_fg, 0., tf.cast(tf.truediv(num_zero, num_fg), tf.float32), name='false_negative') fg_accuracy = tf.where(empty_fg, 0., tf.reduce_mean(tf.gather(correct, fg_inds)), name='fg_accuracy') box_loss = tf.reduce_sum(tf.abs(fg_boxes - fg_box_logits)) box_loss = tf.truediv(box_loss, tf.cast(tf.shape(labels)[0], tf.float32), name='box_loss') add_moving_summary(label_loss, box_loss, accuracy, fg_accuracy, false_negative, tf.cast(num_fg, tf.float32, name='num_fg_label')) return [label_loss, box_loss]
def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks): """ Args: mask_logits: #fg x #category x h x w fg_labels: #fg, in 1~#class, int64 fg_target_masks: #fg x h x w, float32 """ mask_logits = tf.transpose(mask_logits, [0, 3, 1, 2]) if get_tf_version_tuple() >= (1, 14): mask_logits = tf.gather(mask_logits, tf.reshape(fg_labels - 1, [-1, 1]), batch_dims=1) mask_logits = tf.squeeze(mask_logits, axis=1) else: indices = tf.stack( [tf.range(tf.size(fg_labels, out_type=tf.int64)), fg_labels - 1], axis=1) # #fgx2 mask_logits = tf.gather_nd(mask_logits, indices) # #fg x h x w mask_probs = tf.sigmoid(mask_logits) # add some training visualizations to tensorboard with tf.name_scope('mask_viz'): # print('#' * 50) # print('shape of fg_target_masks:' , fg_target_masks.shape) # print('shape of mask_probs: ', mask_probs.shape) # print('#' * 50) viz = tf.concat([fg_target_masks, mask_probs], axis=1) viz = tf.expand_dims(viz, 3) viz = tf.cast(viz * 255, tf.uint8, name='viz') tf.summary.image('mask_truth|pred', viz, max_outputs=10) loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=fg_target_masks, logits=mask_logits) loss = tf.reduce_mean(loss, name='maskrcnn_loss') pred_label = mask_probs > 0.5 truth_label = fg_target_masks > 0.5 accuracy = tf.reduce_mean(tf.cast(tf.equal(pred_label, truth_label), tf.float32), name='accuracy') pos_accuracy = tf.logical_and(tf.equal(pred_label, truth_label), tf.equal(truth_label, True)) pos_accuracy = tf.reduce_mean(tf.cast(pos_accuracy, tf.float32), name='pos_accuracy') fg_pixel_ratio = tf.reduce_mean(tf.cast(truth_label, tf.float32), name='fg_pixel_ratio') add_moving_summary(loss, accuracy, fg_pixel_ratio, pos_accuracy) return loss
def monkeypatch_tf_layers(): if get_tf_version_tuple() < (1, 4): if not hasattr(tf.layers, 'Dense'): from tensorflow.python.layers.core import Dense tf.layers.Dense = Dense from tensorflow.python.layers.normalization import BatchNormalization tf.layers.BatchNormalization = BatchNormalization from tensorflow.python.layers.convolutional import Conv2DTranspose, Conv2D tf.layers.Conv2DTranspose = Conv2DTranspose tf.layers.Conv2D = Conv2D from tensorflow.python.layers.pooling import MaxPooling2D, AveragePooling2D tf.layers.MaxPooling2D = MaxPooling2D tf.layers.AveragePooling2D = AveragePooling2D
def fastrcnn_Xconv1fc_head(feature, num_convs, norm=None): """ Args: feature (NCHW): num_classes(int): num_category + 1 num_convs (int): number of conv layers norm (str or None): either None or 'GN' Returns: 2D head feature """ assert norm in [None, "GN"], norm l = feature with argscope( Conv2D, data_format="channels_first", kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode="fan_out", distribution="untruncated_normal" if get_tf_version_tuple() >= (1, 12) else "normal", ), ): for k in range(num_convs): l = Conv2D("conv{}".format(k), l, cfg.FPN.FRCNN_CONV_HEAD_DIM, 3, activation=tf.nn.relu) if norm is not None: l = GroupNorm("gn{}".format(k), l) l = FullyConnected( "fc", l, cfg.FPN.FRCNN_FC_HEAD_DIM, kernel_initializer=tf.variance_scaling_initializer(), activation=tf.nn.relu, ) return l
def boxclass_Xconv1fc_head(feature, num_convs, norm=None): """ Args: feature (NCHW): num_classes(int): num_category + 1 num_convs (int): number of conv layers norm (str or None): either None or 'GN' Returns: 2D head feature """ assert norm in [None, 'GN'], norm l = feature with argscope( Conv2D, data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='untruncated_normal' if get_tf_version_tuple() >= (1, 12) else 'normal')): for k in range(num_convs): l = Conv2D('conv{}'.format(k), l, cfg.FPN.BOXCLASS_CONV_HEAD_DIM, 3, activation=tf.nn.relu) if norm is not None: l = GroupNorm('gn{}'.format(k), l) l = FullyConnected( 'fc', l, cfg.FPN.BOXCLASS_FC_HEAD_DIM, kernel_initializer=tf.variance_scaling_initializer(), activation=tf.nn.relu) return l
def test(self): if get_tf_version_tuple() < (1, 4): return True # requires leaky_relu self.assertSurvive(self.script, args=None)
def mpusim_conv2d( inputs, filters, kernel_size, strides=(1, 1), padding='same', data_format='channels_last', dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer=None, bias_initializer=tf.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, split=1, activations_datatype_size_byte=1, weights_datatype_size_byte=1, results_datatype_size_byte=4, systolic_array_height=256, systolic_array_width=256, activation_fifo_depth=8, accumulator_array_height=4096, log_file_output_dir='.', model_name='unnamed'): """ Similar to `tf.layers.Conv2D`, but with some differences: 1. Default kernel initializer is variance_scaling_initializer(2.0). 2. Default padding is 'same'. 3. Support 'split' argument to do group convolution. Variable Names: * ``W``: weights * ``b``: bias """ if kernel_initializer is None: if get_tf_version_tuple() <= (1, 12): kernel_initializer = tf.contrib.layers.variance_scaling_initializer(2.0) else: kernel_initializer = tf.keras.initializers.VarianceScaling(2.0, distribution='untruncated_normal') dilation_rate = shape2d(dilation_rate) # group conv implementation data_format = get_data_format(data_format, keras_mode=False) in_shape = inputs.get_shape().as_list() channel_axis = 3 if data_format == 'NHWC' else 1 in_channel = in_shape[channel_axis] assert in_channel is not None, "[mpusim_conv2d] Input cannot have unknown channel!" assert in_channel % split == 0 assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \ "Not supported by group conv or dilated conv!" out_channel = filters assert out_channel % split == 0 assert dilation_rate == [1, 1] or get_tf_version_tuple() >= (1, 5), 'TF>=1.5 required for dilated conv.' kernel_shape = shape2d(kernel_size) filter_shape = kernel_shape + [in_channel / split, out_channel] stride = shape4d(strides, data_format=data_format) kwargs = dict(data_format=data_format) if get_tf_version_tuple() >= (1, 5): kwargs['dilations'] = shape4d(dilation_rate, data_format=data_format) W = tf.get_variable( 'W', filter_shape, initializer=kernel_initializer) if use_bias: b = tf.get_variable('b', [out_channel], initializer=bias_initializer) if split == 1: conv = mpu_sim_conv2d_lib.mpu_sim_conv2d(inputs, W, activations_datatype_size_byte, weights_datatype_size_byte, results_datatype_size_byte, systolic_array_height, systolic_array_width, activation_fifo_depth, accumulator_array_height, log_file_output_dir, model_name, stride, padding.upper(), **kwargs) else: inputs = tf.split(inputs, split, channel_axis) kernels = tf.split(W, split, 3) outputs = [mpu_sim_conv2d_lib.mpu_sim_conv2d(input_block, kernel_block, activations_datatype_size_byte, weights_datatype_size_byte, results_datatype_size_byte, systolic_array_height, systolic_array_width, activation_fifo_depth, accumulator_array_height, log_file_output_dir, model_name, stride, padding.upper(), **kwargs) for input_block, kernel_block in zip(inputs, kernels)] conv = tf.concat(outputs, channel_axis) ret = tf.nn.bias_add(conv, b, data_format=data_format) if use_bias else conv if activation is not None: ret = activation(ret) ret = tf.identity(ret, name='output') ret.variables = VariableHolder(W=W) if use_bias: ret.variables.b=b return ret
self._eval() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--load', help='load a model for evaluation or training. Can overwrite BACKBONE.WEIGHTS') parser.add_argument('--logdir', help='log directory', default='train_log/maskrcnn') parser.add_argument('--visualize', action='store_true', help='visualize intermediate results') parser.add_argument('--evaluate', help="Run evaluation on COCO. " "This argument is the path to the output json evaluation file") parser.add_argument('--predict', help="Run prediction on a given image. " "This argument is the path to the input image file") parser.add_argument('--config', help="A list of KEY=VALUE to overwrite those defined in config.py", nargs='+') if get_tf_version_tuple() < (1, 6): # https://github.com/tensorflow/tensorflow/issues/14657 logger.warn("TF<1.6 has a bug which may lead to crash in FasterRCNN if you're unlucky.") args = parser.parse_args() if args.config: cfg.update_args(args.config) MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model() if args.visualize or args.evaluate or args.predict: assert args.load finalize_configs(is_training=False) if args.predict or args.visualize: cfg.TEST.RESULT_SCORE_THRESH = cfg.TEST.RESULT_SCORE_THRESH_VIS
def MaskedConv2D( inputs, filters, kernel_size, strides=(1, 1), padding='same', data_format='channels_last', dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer=None, bias_initializer=tf.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, split=1, masking=False): """ A wrapper around `tf.layers.Conv2D`. Some differences to maintain backward-compatibility: 1. Default kernel initializer is variance_scaling_initializer(2.0). 2. Default padding is 'same'. 3. Support 'split' argument to do group conv. Variable Names: * ``W``: weights * ``b``: bias """ if kernel_initializer is None: if get_tf_version_tuple() <= (1, 12): kernel_initializer = tf.contrib.layers.variance_scaling_initializer(2.0) else: kernel_initializer = tf.keras.initializers.VarianceScaling(2.0, distribution='untruncated_normal') dilation_rate = shape2d(dilation_rate) if (masking == False) and (split == 1) and (dilation_rate == [1, 1]): # tf.layers.Conv2D has bugs with dilations (https://github.com/tensorflow/tensorflow/issues/26797) with rename_get_variable({'kernel': 'W', 'bias': 'b'}): layer = tf.layers.Conv2D( filters, kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, _reuse=tf.get_variable_scope().reuse) ret = layer.apply(inputs, scope=tf.get_variable_scope()) ret = tf.identity(ret, name='output') ret.variables = VariableHolder(W=layer.kernel) if use_bias: ret.variables.b = layer.bias else: if masking == True: assert split == 1, "Pruining group conv is not supported yet" # group conv implementation data_format = get_data_format(data_format, keras_mode=False) in_shape = inputs.get_shape().as_list() channel_axis = 3 if data_format == 'NHWC' else 1 in_channel = in_shape[channel_axis] assert in_channel is not None, "[Conv2D] Input cannot have unknown channel!" assert in_channel % split == 0 assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \ "Not supported by group conv or dilated conv!" out_channel = filters assert out_channel % split == 0 assert dilation_rate == [1, 1] or get_tf_version_tuple() >= (1, 5), 'TF>=1.5 required for dilated conv.' kernel_shape = shape2d(kernel_size) filter_shape = kernel_shape + [in_channel / split, out_channel] stride = shape4d(strides, data_format=data_format) kwargs = dict(data_format=data_format) if get_tf_version_tuple() >= (1, 5): kwargs['dilations'] = shape4d(dilation_rate, data_format=data_format) W = tf.get_variable( 'W', filter_shape, initializer=kernel_initializer) if use_bias: b = tf.get_variable('b', [out_channel], initializer=bias_initializer) if split == 1: if masking: W = pruning.apply_mask(W) conv = tf.nn.conv2d(inputs, W, stride, padding.upper(), **kwargs) else: conv = None if get_tf_version_tuple() >= (1, 13): try: conv = tf.nn.conv2d(inputs, W, stride, padding.upper(), **kwargs) except ValueError: log_once("CUDNN group convolution support is only available with " "https://github.com/tensorflow/tensorflow/pull/25818 . " "Will fall back to a loop-based slow implementation instead!", 'warn') if conv is None: inputs = tf.split(inputs, split, channel_axis) kernels = tf.split(W, split, 3) outputs = [tf.nn.conv2d(i, k, stride, padding.upper(), **kwargs) for i, k in zip(inputs, kernels)] conv = tf.concat(outputs, channel_axis) ret = tf.nn.bias_add(conv, b, data_format=data_format) if use_bias else conv if activation is not None: ret = activation(ret) ret = tf.identity(ret, name='output') ret.variables = VariableHolder(W=W) if use_bias: ret.variables.b = b return ret
tpviz.interactive_imshow(viz) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--load', help='load a model for evaluation or training. Can overwrite BACKBONE.WEIGHTS') parser.add_argument('--logdir', help='log directory', default='train_log/maskrcnn') parser.add_argument('--visualize', action='store_true', help='visualize intermediate results') parser.add_argument('--evaluate', help="Run evaluation. " "This argument is the path to the output json evaluation file") parser.add_argument('--predict', help="Run prediction on a given image. " "This argument is the path to the input image file") parser.add_argument('--config', help="A list of KEY=VALUE to overwrite those defined in config.py", nargs='+') if get_tf_version_tuple() < (1, 6): # https://github.com/tensorflow/tensorflow/issues/14657 logger.warn("TF<1.6 has a bug which may lead to crash in FasterRCNN if you're unlucky.") args = parser.parse_args() if args.config: cfg.update_args(args.config) MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model() DetectionDataset() # initialize the config with information from our dataset if args.visualize or args.evaluate or args.predict: assert tf.test.is_gpu_available() assert args.load finalize_configs(is_training=False)
def Conv3D( inputs, filters, kernel_size, strides=(1, 1, 1), padding='same', data_format='channels_last', dilation_rate=(1, 1, 1), activation=None, use_bias=True, kernel_initializer=tf.contrib.layers.variance_scaling_initializer(2.0), bias_initializer=tf.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, split=1): """ A wrapper around `tf.layers.Conv3D`. Some differences to maintain backward-compatibility: 1. Default kernel initializer is variance_scaling_initializer(2.0). 2. Default padding is 'same'. 3. Support 'split' argument to do group conv. Variable Names: * ``W``: weights * ``b``: bias """ if split == 1: with rename_get_variable({'kernel': 'W', 'bias': 'b'}): layer = tf.layers.Conv3D(filters, kernel_size, strides=strides, padding=padding, data_format='channels_last', dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer) ret = layer.apply(inputs, scope=tf.get_variable_scope()) ret = tf.identity(ret, name='output') ret.variables = VariableHolder(W=layer.kernel) if use_bias: ret.variables.b = layer.bias else: # group conv implementation data_format = get_data_format3d(data_format, tfmode=False) in_shape = inputs.get_shape().as_list() channel_axis = 4 if data_format == 'NDHWC' else 1 in_channel = in_shape[channel_axis] assert in_channel is not None, "[Conv3D] Input cannot have unknown channel!" assert in_channel % split == 0 assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \ "Not supported by group conv now!" out_channel = filters assert out_channel % split == 0 assert dilation_rate == (1, 1, 1) or get_tf_version_tuple() >= ( 1, 5), 'TF>=1.5 required for group dilated conv' kernel_shape = shape3d(kernel_size) filter_shape = kernel_shape + [in_channel / split, out_channel] stride = shape5d(strides, data_format=data_format) kwargs = dict(data_format=data_format) if get_tf_version_tuple() >= (1, 5): kwargs['dilations'] = shape4d(dilation_rate, data_format=data_format) W = tf.get_variable('W', filter_shape, initializer=kernel_initializer) if use_bias: b = tf.get_variable('b', [out_channel], initializer=bias_initializer) inputs = tf.split(inputs, split, channel_axis) # tf.split(value,num_or_size_splits,axis=0, num=None,name='split') kernels = tf.split(W, split, 4) outputs = [ tf.nn.conv3d(i, k, stride, padding.upper(), **kwargs) for i, k in zip(inputs, kernels) ] conv = tf.concat(outputs, channel_axis) if activation is None: activation = tf.identity ret = activation(tf.nn.bias_add(conv, b, data_format=data_format) if use_bias else conv, name='output') ret.variables = VariableHolder(W=W) if use_bias: ret.variables.b = b return ret
def generate_fpn_proposals(multilevel_anchor_boxes, multilevel_box_logits, multilevel_label_logits, orig_image_dims, batch_size): """ Generating the rois from the box logits and pick K with top label scores as the box proposals. Args: multilevel_box_logits: #lvl [ BS x (NA * 4) x H_feature x W_feature ] boxes multilevel_label_logits: #lvl [ BS x H_feature x W_feature x NA ] tensors orig_image_dimensions: Original (prepadding) image dimensions (h,w,c) BS x 3 Returns: boxes: K x 5 float scores: 1-D, K (logits) """ prefix = "model_fpn.generate_fpn_proposals" bug_prefix = "GEN_PROPOSALS_BUG fpn" num_lvl = len(cfg.FPN.ANCHOR_STRIDES) assert len(multilevel_label_logits) == num_lvl orig_images_hw = orig_image_dims[:, :2] training = get_current_tower_context().is_training all_boxes = [] all_scores = [] if cfg.FPN.PROPOSAL_MODE == 'Level': fpn_nms_topk = cfg.RPN.TRAIN_PER_LEVEL_NMS_TOPK * batch_size if training else cfg.RPN.TEST_PER_LEVEL_NMS_TOPK for lvl in range(num_lvl): with tf.name_scope(f'Lvl{lvl}'): im_info = tf.cast(orig_images_hw, tf.float32) scores = multilevel_label_logits[ lvl] # BS x H_feature x W_featurex NA bbox_deltas = tf.transpose( multilevel_box_logits[lvl], [0, 2, 3, 1]) #BS x H_feature x W_feature x (NA * 4) single_level_anchor_boxes = multilevel_anchor_boxes[lvl] single_level_anchor_boxes = tf.reshape( single_level_anchor_boxes, (-1, 4)) # # This is a custom tensorflow op that translates the bbox deltas into bounding box coordinates # and then runs NMS. See CODEBASE.md for more info # # roi: (# boxes for a single level) x 5, the 5 colunms arranged as: batch_index, x_1, y_1, x_2, y_2 # rois_probs: 1-D, # boxes for a single level # name change in tf 1.15 generate_bounding_box_proposals = tf.generate_bounding_box_proposals_v2 if get_tf_version_tuple()==(1,15) \ else tf.generate_bounding_box_proposals rois, rois_probs = generate_bounding_box_proposals( scores, bbox_deltas, im_info, single_level_anchor_boxes, spatial_scale=1.0 / cfg.FPN.ANCHOR_STRIDES[lvl], pre_nms_topn=fpn_nms_topk, post_nms_topn=fpn_nms_topk, nms_threshold=cfg.RPN.PROPOSAL_NMS_THRESH, min_size=cfg.RPN.MIN_SIZE) # rois_probs = print_runtime_shape(f'rois_probs, lvl {lvl}', rois_probs, prefix=bug_prefix) all_boxes.append(rois) all_scores.append(rois_probs) proposal_boxes = tf.concat(all_boxes, axis=0) # Num_all_rois x 5 proposal_boxes = tf.reshape(proposal_boxes, [-1, 5]) # Num_all_rois x 5 proposal_scores = tf.concat(all_scores, axis=0) # 1-D Num_all_rois proposal_scores = tf.reshape(proposal_scores, [-1]) # 1-D Num_all_rois proposal_topk = tf.minimum(tf.size(proposal_scores), fpn_nms_topk) proposal_scores, topk_indices = tf.nn.top_k(proposal_scores, k=proposal_topk, sorted=False) proposal_boxes = tf.gather(proposal_boxes, topk_indices) # K x 5 else: raise RuntimeError( "Only level-wise predictions are supported with batches") return tf.stop_gradient(proposal_boxes, name='boxes'), \ tf.stop_gradient(proposal_scores, name='scores')
parser.add_argument('--visualize', action='store_true', help='visualize intermediate results') parser.add_argument( '--evaluate', help="Run evaluation. " "This argument is the path to the output json evaluation file") parser.add_argument('--predict', help="Run prediction on a given image. " "This argument is the path to the input image file") parser.add_argument( '--config', help="A list of KEY=VALUE to overwrite those defined in config.py", nargs='+') if get_tf_version_tuple() < (1, 6): # https://github.com/tensorflow/tensorflow/issues/14657 logger.warn( "TF<1.6 has a bug which may lead to crash in FasterRCNN if you're unlucky." ) args = parser.parse_args() if args.config: cfg.update_args(args.config) MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model() DetectionDataset( ) # initialize the config with information from our dataset if args.visualize or args.evaluate or args.predict: if not tf.test.is_gpu_available():
def generate_fpn_proposals_topk_per_image(multilevel_anchor_boxes, multilevel_box_logits, multilevel_label_logits, orig_image_dims, batch_size): """ Args: multilevel_box_logits: #lvl [ BS x (NAx4) x H x W ] boxes multilevel_label_logits: #lvl [ BS x H x W x A ] tensors orig_image_dimensions: Original (prepadding) image dimensions (h,w,c) BS x 3 Returns: boxes: K x 5 float scores: (#lvl x BS x K) vector (logits) """ num_lvl = len(cfg.FPN.ANCHOR_STRIDES) assert len(multilevel_label_logits) == num_lvl orig_images_hw = orig_image_dims[:, :2] training = get_current_tower_context().is_training all_boxes = [] all_scores = [] if cfg.FPN.PROPOSAL_MODE == 'Level': fpn_nms_topk = cfg.RPN.TRAIN_PER_LEVEL_NMS_TOPK if training else cfg.RPN.TEST_PER_LEVEL_NMS_TOPK boxes_list = [] scores_list = [] bs = batch_size if training else 1 for i in range(bs): all_boxes = [] all_scores = [] for lvl in range(num_lvl): with tf.name_scope(f'Lvl{lvl}'): im_info = tf.cast(orig_images_hw[i:(i + 1)], tf.float32) # h, w scores = multilevel_label_logits[lvl][i:(i + 1)] bbox_deltas = tf.transpose( multilevel_box_logits[lvl][i:(i + 1)], [0, 2, 3, 1]) single_level_anchor_boxes = multilevel_anchor_boxes[lvl] single_level_anchor_boxes = tf.reshape( single_level_anchor_boxes, (-1, 4)) # https://caffe2.ai/docs/operators-catalogue.html#generateproposals generate_bounding_box_proposals = tf.generate_bounding_box_proposals_v2 if get_tf_version_tuple()==(1,15) \ else tf.generate_bounding_box_proposals rois, rois_probs = generate_bounding_box_proposals( scores, bbox_deltas, im_info, single_level_anchor_boxes, spatial_scale=1.0 / cfg.FPN.ANCHOR_STRIDES[lvl], pre_nms_topn=fpn_nms_topk, post_nms_topn=fpn_nms_topk, nms_threshold=cfg.RPN.PROPOSAL_NMS_THRESH, min_size=cfg.RPN.MIN_SIZE) # rois_probs = print_runtime_shape(f'rois_probs, lvl {lvl}', rois_probs, prefix=bug_prefix) all_boxes.append( tf.concat((i + rois[:, :1], rois[:, 1:]), axis=1)) all_scores.append(rois_probs) proposal_boxes = tf.concat(all_boxes, axis=0) # (#lvl x BS) x K x 5 proposal_boxes = tf.reshape(proposal_boxes, [-1, 5]) # (#lvl x BS x K) x 5 proposal_scores = tf.concat(all_scores, axis=0) # (#lvl x BS) x K proposal_scores = tf.reshape(proposal_scores, [-1]) # (#lvl x BS x 5) vector topk = tf.minimum(tf.size(proposal_scores), fpn_nms_topk) topk_scores, topk_indices = tf.nn.top_k(proposal_scores, k=topk, sorted=False) boxes_list.append(tf.gather(proposal_boxes, topk_indices)) scores_list.append(tf.gather(proposal_scores, topk_indices)) # # boxes_list = [] # scores_list = [] # # for i in range(batch_size): # batch_ind = tf.squeeze(tf.where(tf.equal(proposal_boxes[:, 0], i)), axis=1) # image_scores = tf.gather(proposal_scores, batch_ind) # image_boxes = tf.gather(proposal_boxes, batch_ind) # # image_proposal_topk = tf.minimum(tf.size(image_scores), fpn_nms_topk//batch_size) # image_proposal_scores, image_topk_indices = tf.nn.top_k(image_scores, k=image_proposal_topk, sorted=False) # boxes_list.append(tf.gather(image_boxes, image_topk_indices)) # scores_list.append(image_proposal_scores) boxes = tf.concat(boxes_list, axis=0) scores = tf.concat(scores_list, axis=0) # proposal_topk = tf.minimum(tf.size(proposal_scores), fpn_nms_topk) # proposal_scores, topk_indices = tf.nn.top_k(proposal_scores, k=proposal_topk, sorted=False) # proposal_boxes = tf.gather(proposal_boxes, topk_indices) else: raise RuntimeError( "Only level-wise predictions are supported with batches") return tf.stop_gradient(boxes, name='boxes'), \ tf.stop_gradient(scores, name='scores')
def test(self): if get_tf_version_tuple() < (1, 4): return True # requires leaky_relu self.assertSurvive(self.script, args=None)
def BatchNorm3d(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5, center=True, scale=True, beta_initializer=tf.zeros_initializer(), gamma_initializer=tf.ones_initializer(), virtual_batch_size=None, data_format='channels_last', internal_update=False, sync_statistics=None): """ Almost equivalent to `tf.layers.batch_normalization`, but different (and more powerful) in the following: 1. Accepts an alternative `data_format` option when `axis` is None. For 2D input, this argument will be ignored. 2. Default value for `momentum` and `epsilon` is different. 3. Default value for `training` is automatically obtained from tensorpack's `TowerContext`, but can be overwritten. 4. Support the `internal_update` option, which enables the use of BatchNorm layer inside conditionals. 5. Support the `sync_statistics` option, which is very useful in small-batch models. Args: internal_update (bool): if False, add EMA update ops to `tf.GraphKeys.UPDATE_OPS`. If True, update EMA inside the layer by control dependencies. They are very similar in speed, but `internal_update=True` can be used when you have conditionals in your model, or when you have multiple networks to train. Corresponding TF issue: https://github.com/tensorflow/tensorflow/issues/14699 sync_statistics: either None or "nccl". By default (None), it uses statistics of the input tensor to normalize. When set to "nccl", this layer must be used under tensorpack multi-gpu trainers, and it then uses per-machine (multiple GPU) statistics to normalize. Note that this implementation averages the per-tower E[x] and E[x^2] among towers to compute global mean&variance. The result is the global mean&variance only if each tower has the same batch size. This option has no effect when not training. This option is also known as "Cross-GPU BatchNorm" as mentioned in https://arxiv.org/abs/1711.07240. Corresponding TF issue: https://github.com/tensorflow/tensorflow/issues/18222 Variable Names: * ``beta``: the bias term. Will be zero-inited by default. * ``gamma``: the scale term. Will be one-inited by default. * ``mean/EMA``: the moving average of mean. * ``variance/EMA``: the moving average of variance. Note: Combinations of ``training`` and ``ctx.is_training``: * ``training == ctx.is_training``: standard BN, EMA are maintained during training and used during inference. This is the default. * ``training and not ctx.is_training``: still use batch statistics in inference. * ``not training and ctx.is_training``: use EMA to normalize in training. This is useful when you load a pre-trained BN and don't want to fine tune the EMA. EMA will not be updated in this case. """ # parse shapes data_format = get_data_format(data_format, tfmode=False) shape = inputs.get_shape().as_list() ndims = len(shape) # in 3d conv, we have 5d dim [batch, c, d, h, w] # assert ndims in [2, 4], ndims if sync_statistics is not None: sync_statistics = sync_statistics.lower() assert sync_statistics in [None, 'nccl', 'horovod'], sync_statistics if axis is None: if ndims == 2: data_format = 'NHWC' axis = 1 elif ndims == 5: axis = 1 if data_format == 'NCHW' else 4 else: axis = 1 if data_format == 'NCHW' else 3 else: data_format = 'NCHW' if axis == 1 else 'NHWC' num_chan = shape[axis] # parse training/ctx ctx = get_current_tower_context() if training is None: training = ctx.is_training training = bool(training) TF_version = get_tf_version_tuple() TF_version = float(f"{TF_version[0]}.{TF_version[1]}") if not training and ctx.is_training: assert TF_version >= 1.4, \ "Fine tuning a BatchNorm model with fixed statistics is only " \ "supported after https://github.com/tensorflow/tensorflow/pull/12580 " if ctx.is_main_training_tower: # only warn in first tower logger.warn( "[BatchNorm] Using moving_mean/moving_variance in training.") # Using moving_mean/moving_variance in training, which means we # loaded a pre-trained BN and only fine-tuning the affine part. if sync_statistics is None or not (training and ctx.is_training): coll_bk = backup_collection([tf.GraphKeys.UPDATE_OPS]) with rename_get_variable({ 'moving_mean': 'mean/EMA', 'moving_variance': 'variance/EMA' }): tf_args = dict(axis=axis, momentum=momentum, epsilon=epsilon, center=center, scale=scale, beta_initializer=beta_initializer, gamma_initializer=gamma_initializer, fused=True, _reuse=tf.get_variable_scope().reuse) if TF_version >= 1.5: tf_args['virtual_batch_size'] = virtual_batch_size else: assert virtual_batch_size is None, "Feature not supported in this version of TF!" layer = tf.layers.BatchNormalization(**tf_args) xn = layer.apply(inputs, training=training, scope=tf.get_variable_scope()) # maintain EMA only on one GPU is OK, even in replicated mode. # because during training, EMA isn't used if ctx.is_main_training_tower: for v in layer.non_trainable_variables: add_model_variable(v) if not ctx.is_main_training_tower or internal_update: restore_collection(coll_bk) if training and internal_update: assert layer.updates with tf.control_dependencies(layer.updates): ret = tf.identity(xn, name='output') else: ret = tf.identity(xn, name='output') vh = ret.variables = VariableHolder( moving_mean=layer.moving_mean, mean=layer.moving_mean, # for backward-compatibility moving_variance=layer.moving_variance, variance=layer.moving_variance) # for backward-compatibility if scale: vh.gamma = layer.gamma if center: vh.beta = layer.beta else: red_axis = [0] if ndims == 2 else ( [0, 2, 3] if axis == 1 else [0, 1, 2]) if ndims == 5: red_axis = [0, 2, 3, 4] if axis == 1 else [0, 1, 2, 3] new_shape = None # don't need to reshape unless ... if ndims == 4 and axis == 1: new_shape = [1, num_chan, 1, 1] if ndims == 5 and axis == 1: new_shape = [1, num_chan, 1, 1, 1] batch_mean = tf.reduce_mean(inputs, axis=red_axis) batch_mean_square = tf.reduce_mean(tf.square(inputs), axis=red_axis) if sync_statistics == 'nccl': if six.PY3 and TF_version <= 1.8 and ctx.is_main_training_tower: logger.warn( "A TensorFlow bug will cause cross-GPU BatchNorm to fail. " "Apply this patch: https://github.com/tensorflow/tensorflow/pull/20360" ) from tensorflow.contrib.nccl.ops import gen_nccl_ops shared_name = re.sub('tower[0-9]+/', '', tf.get_variable_scope().name) num_dev = ctx.total batch_mean = gen_nccl_ops.nccl_all_reduce( input=batch_mean, reduction='sum', num_devices=num_dev, shared_name=shared_name + '_NCCL_mean') * (1.0 / num_dev) batch_mean_square = gen_nccl_ops.nccl_all_reduce( input=batch_mean_square, reduction='sum', num_devices=num_dev, shared_name=shared_name + '_NCCL_mean_square') * (1.0 / num_dev) elif sync_statistics == 'horovod': # Require https://github.com/uber/horovod/pull/331 # Proof-of-concept, not ready yet. import horovod.tensorflow as hvd batch_mean = hvd.allreduce(batch_mean, average=True) batch_mean_square = hvd.allreduce(batch_mean_square, average=True) batch_var = batch_mean_square - tf.square(batch_mean) batch_mean_vec = batch_mean batch_var_vec = batch_var beta, gamma, moving_mean, moving_var = get_bn_variables( num_chan, scale, center, beta_initializer, gamma_initializer) if new_shape is not None: batch_mean = tf.reshape(batch_mean, new_shape) batch_var = tf.reshape(batch_var, new_shape) # Using fused_batch_norm(is_training=False) is actually slightly faster, # but hopefully this call will be JITed in the future. xn = tf.nn.batch_normalization(inputs, batch_mean, batch_var, tf.reshape(beta, new_shape), tf.reshape(gamma, new_shape), epsilon) else: xn = tf.nn.batch_normalization(inputs, batch_mean, batch_var, beta, gamma, epsilon) if ctx.is_main_training_tower: ret = update_bn_ema(xn, batch_mean_vec, batch_var_vec, moving_mean, moving_var, momentum, internal_update) else: ret = tf.identity(xn, name='output') vh = ret.variables = VariableHolder( moving_mean=moving_mean, mean=moving_mean, # for backward-compatibility moving_variance=moving_var, variance=moving_var) # for backward-compatibility if scale: vh.gamma = gamma if center: vh.beta = beta return ret
def main(args): # "spawn/forkserver" is safer than the default "fork" method and # produce more deterministic behavior & memory saving # However its limitation is you cannot pass a lambda function to subprocesses. import multiprocessing as mp mp.set_start_method('spawn') if get_tf_version_tuple() < (1, 6): # https://github.com/tensorflow/tensorflow/issues/14657 logger.warn( "TF<1.6 has a bug which may lead to crash in FasterRCNN if you're unlucky." ) # Setup logging ... is_horovod = cfg.TRAINER == 'horovod' if is_horovod: hvd.init() if not is_horovod or hvd.rank() == 0: logger.set_logger_dir(args.logdir, 'd') logger.info("Environment Information:\n" + collect_env_info()) finalize_configs(is_training=True) # Create model MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model() # Compute the training schedule from the number of GPUs ... stepnum = cfg.TRAIN.STEPS_PER_EPOCH # warmup is step based, lr is epoch based init_lr = cfg.TRAIN.WARMUP_INIT_LR * min(8. / cfg.TRAIN.NUM_GPUS, 1.) warmup_schedule = [(0, init_lr), (cfg.TRAIN.WARMUP, cfg.TRAIN.BASE_LR)] warmup_end_epoch = cfg.TRAIN.WARMUP * 1. / stepnum lr_schedule = [(int(warmup_end_epoch + 0.5), cfg.TRAIN.BASE_LR)] factor = 8. / cfg.TRAIN.NUM_GPUS for idx, steps in enumerate(cfg.TRAIN.LR_SCHEDULE[:-1]): mult = 0.1**(idx + 1) lr_schedule.append( (steps * factor // stepnum, cfg.TRAIN.BASE_LR * mult)) logger.info("Warm Up Schedule (steps, value): " + str(warmup_schedule)) logger.info("LR Schedule (epochs, value): " + str(lr_schedule)) train_dataflow = get_train_dataflow() # This is what's commonly referred to as "epochs" total_passes = cfg.TRAIN.LR_SCHEDULE[-1] * 8 / train_dataflow.size() logger.info( "Total passes of the training set is: {:.5g}".format(total_passes)) # Create callbacks ... callbacks = [ PeriodicCallback(ModelSaver(max_to_keep=10, keep_checkpoint_every_n_hours=1), every_k_epochs=cfg.TRAIN.CHECKPOINT_PERIOD), # linear warmup ScheduledHyperParamSetter('learning_rate', warmup_schedule, interp='linear', step_based=True), ScheduledHyperParamSetter('learning_rate', lr_schedule), GPUMemoryTracker(), HostMemoryTracker(), ThroughputTracker(samples_per_step=cfg.TRAIN.NUM_GPUS), EstimatedTimeLeft(median=True), SessionRunTimeout(60000) # 1 minute timeout #AMLCallback() #GPUUtilizationTracker() ] if cfg.TRAIN.EVAL_PERIOD > 0: callbacks.extend([ EvalCallback(dataset, *MODEL.get_inference_tensor_names(), args.logdir) for dataset in cfg.DATA.VAL ]) if is_horovod and hvd.rank() > 0: session_init = None else: if args.load: # ignore mismatched values, so you can `--load` a model for fine-tuning session_init = SmartInit(args.load, ignore_mismatch=True) else: session_init = SmartInit(cfg.BACKBONE.WEIGHTS) traincfg = TrainConfig(model=MODEL, data=QueueInput(train_dataflow), callbacks=callbacks, monitors=[AMLMonitor()], steps_per_epoch=stepnum, max_epoch=cfg.TRAIN.LR_SCHEDULE[-1] * factor // stepnum, session_init=session_init, starting_epoch=cfg.TRAIN.STARTING_EPOCH) if is_horovod: trainer = HorovodTrainer(average=False) else: # nccl mode appears faster than cpu mode trainer = SyncMultiGPUTrainerReplicated(cfg.TRAIN.NUM_GPUS, average=False, mode='nccl') launch_train_with_config(traincfg, trainer)
def Conv(inputs, filters, kernel_size, strides=(1, 1), padding='same', data_format='channels_last', dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer=None, bias_initializer=tf.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, split=1, norm=False): """ Similar to `tf.layers.Conv2D`, but with some differences: 1. Default kernel initializer is variance_scaling_initializer(2.0). 2. Default padding is 'same'. 3. Support 'split' argument to do group convolution. Variable Names: * ``W``: weights * ``b``: bias """ if kernel_initializer is None: if get_tf_version_tuple() <= (1, 12): kernel_initializer = tf.contrib.layers.variance_scaling_initializer( 2.0) # deprecated else: kernel_initializer = tf.keras.initializers.VarianceScaling( 2.0, distribution='untruncated_normal') dilation_rate = shape2d(dilation_rate) if True: # group conv implementation data_format = get_data_format(data_format, keras_mode=False) in_shape = inputs.get_shape().as_list() channel_axis = 3 if data_format == 'NHWC' else 1 in_channel = in_shape[channel_axis] assert in_channel is not None, "[Conv2D] Input cannot have unknown channel!" assert in_channel % split == 0 assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \ "Not supported by group conv or dilated conv!" out_channel = filters assert out_channel % split == 0 assert dilation_rate == [1, 1] or get_tf_version_tuple() >= ( 1, 5), 'TF>=1.5 required for dilated conv.' kernel_shape = shape2d(kernel_size) filter_shape = kernel_shape + [in_channel // split, out_channel] stride = shape4d(strides, data_format=data_format) kwargs = {"data_format": data_format} if get_tf_version_tuple() >= (1, 5): kwargs['dilations'] = shape4d(dilation_rate, data_format=data_format) # matching input dtype (ex. tf.float16) since the default dtype of variable if tf.float32 inputs_dtype = inputs.dtype W = tf.get_variable('parseweigth', filter_shape, dtype=inputs_dtype, initializer=kernel_initializer) if norm: use_bias = False W = tf.reshape(W, kernel_shape + [4, in_channel // 4, out_channel]) W = tf.nn.softmax(W, 2) W = tf.reshape(W, filter_shape) #dynamics = tf.reduce_mean(inputs, 0) #dynamics = tf.transpose(dynamics, [1,2,0]) #dynamics = tf.image.resize_images(dynamics, kernel_shape) #dynamics = tf.expand_dims(dynamics, -1) #W = W + 0.001 * dynamics #tf.random_normal(shape = tf.shape(W), mean = 0.0, stddev = 0.012, dtype = tf.float32) #W = W *tf.random_uniform(shape=W.get_shape().as_list(), minval=0., maxval=2.) if use_bias: b = tf.get_variable('parsebias', [out_channel], dtype=inputs_dtype, initializer=bias_initializer) if split == 1: conv = tf.nn.conv2d(inputs, W, stride, padding.upper(), **kwargs) else: try: conv = tf.nn.conv2d(inputs, W, stride, padding.upper(), **kwargs) except ValueError: log_once( "CUDNN group convolution support is only available with " "https://github.com/tensorflow/tensorflow/pull/25818 . " "Will fall back to a loop-based slow implementation instead!", 'warn') ret = tf.nn.bias_add(conv, b, data_format=data_format) if use_bias else conv if activation is not None: ret = activation(ret) ret = tf.identity(ret, name='output') ret.variables = VariableHolder(W=W) if use_bias: ret.variables.b = b return ret
parser.add_argument('--visualize', action='store_true', help='visualize intermediate results') parser.add_argument( '--evaluate', help="Run evaluation on COCO. " "This argument is the path to the output json evaluation file") parser.add_argument('--predict', help="Run prediction on a given image. " "This argument is the path to the input image file") parser.add_argument( '--config', help="A list of KEY=VALUE to overwrite those defined in config.py", nargs='+') if get_tf_version_tuple() < (1, 6): # https://github.com/tensorflow/tensorflow/issues/14657 logger.warn( "TF<1.6 has a bug which may lead to crash in FasterRCNN if you're unlucky." ) if get_tf_version_tuple() == (1, 11): # https://github.com/tensorflow/tensorflow/issues/22750 logger.warn("TF=1.11 has a bug which leads to crash in inference.") args = parser.parse_args() if args.config: cfg.update_args(args.config) MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model() if args.visualize or args.evaluate or args.predict:
def test(self): return True # https://github.com/tensorflow/tensorflow/issues/24517 if get_tf_version_tuple() < (1, 4): return True # requires leaky_relu self.assertSurvive(self.script, args=None)
def fastrcnn_predictions(boxes, scores): """ Generate final results from predictions of all proposals. Args: boxes: n#classx4 floatbox in float32 scores: nx#class Returns: boxes: Kx4 scores: K labels: K """ assert boxes.shape[1] == cfg.DATA.NUM_CLASS assert scores.shape[1] == cfg.DATA.NUM_CLASS boxes = tf.transpose(boxes, [1, 0, 2])[1:, :, :] # #catxnx4 scores = tf.transpose(scores[:, 1:], [1, 0]) # #catxn def f(X): """ prob: n probabilities box: nx4 boxes Returns: n boolean, the selection """ prob, box = X output_shape = tf.shape(prob, out_type=tf.int64) # filter by score threshold ids = tf.reshape(tf.where(prob > cfg.TEST.RESULT_SCORE_THRESH), [-1]) prob = tf.gather(prob, ids) box = tf.gather(box, ids) # NMS within each class selection = tf.image.non_max_suppression(box, prob, cfg.TEST.RESULTS_PER_IM, cfg.TEST.FRCNN_NMS_THRESH) selection = tf.gather(ids, selection) if get_tf_version_tuple() >= (1, 13): sorted_selection = tf.sort(selection, direction='ASCENDING') mask = tf.sparse.SparseTensor(indices=tf.expand_dims( sorted_selection, 1), values=tf.ones_like(sorted_selection, dtype=tf.bool), dense_shape=output_shape) mask = tf.sparse.to_dense(mask, default_value=False) else: # this function is deprecated by TF sorted_selection = -tf.nn.top_k(-selection, k=tf.size(selection))[0] mask = tf.sparse_to_dense(sparse_indices=sorted_selection, output_shape=output_shape, sparse_values=True, default_value=False) return mask # TF bug in version 1.11, 1.12: https://github.com/tensorflow/tensorflow/issues/22750 buggy_tf = get_tf_version_tuple() in [(1, 11), (1, 12)] masks = tf.map_fn(f, (scores, boxes), dtype=tf.bool, parallel_iterations=1 if buggy_tf else 10) # #cat x N selected_indices = tf.where( masks) # #selection x 2, each is (cat_id, box_id) scores = tf.boolean_mask(scores, masks) # filter again by sorting scores topk_scores, topk_indices = tf.nn.top_k(scores, tf.minimum(cfg.TEST.RESULTS_PER_IM, tf.size(scores)), sorted=False) filtered_selection = tf.gather(selected_indices, topk_indices) cat_ids, box_ids = tf.unstack(filtered_selection, axis=1) final_scores = tf.identity(topk_scores, name='scores') final_labels = tf.add(cat_ids, 1, name='labels') final_ids = tf.stack([cat_ids, box_ids], axis=1, name='all_ids') final_boxes = tf.gather_nd(boxes, final_ids, name='boxes') return final_boxes, final_scores, final_labels
#!/usr/bin/env python # -*- coding: utf-8 -*- # File: dump-model-params.py import argparse import numpy as np import os import six import tensorflow as tf from tensorpack import logger from tensorpack.tfutils import varmanip from tensorpack.tfutils.common import get_op_tensor_name, get_tf_version_tuple TF_version = get_tf_version_tuple() def _import_external_ops(message): if "horovod" in message.lower(): logger.info("Importing horovod ...") import horovod.tensorflow # noqa return if "MaxBytesInUse" in message: logger.info("Importing memory_stats ...") from tensorflow.contrib.memory_stats import MaxBytesInUse # noqa return if 'Nccl' in message: logger.info("Importing nccl ...") if TF_version <= (1, 12): try: from tensorflow.contrib.nccl.python.ops.nccl_ops import _validate_and_load_nccl_so