Esempio n. 1
0
    def _setup_graph(self):
        num_gpu = cfg.TRAIN.NUM_GPUS
        if cfg.TRAINER == 'replicated':
            # TF bug in version 1.11, 1.12: https://github.com/tensorflow/tensorflow/issues/22750
            buggy_tf = get_tf_version_tuple() in [(1, 11), (1, 12)]

            # Use two predictor threads per GPU to get better throughput
            self.num_predictor = num_gpu if buggy_tf else num_gpu * 2
            self.predictors = [
                self._build_predictor(k % num_gpu)
                for k in range(self.num_predictor)
            ]
            self.dataflows = [
                get_eval_dataflow(self._eval_dataset,
                                  shard=k,
                                  num_shards=self.num_predictor)
                for k in range(self.num_predictor)
            ]
        else:
            # Eval on all ranks and use gather
            self.predictor = self._build_predictor(0)

            if self.batched:
                self.dataflow = get_batched_eval_dataflow(
                    self._eval_dataset,
                    shard=hvd.rank(),
                    num_shards=hvd.size(),
                    batch_size=self.batch_size)
            else:
                self.dataflow = get_eval_dataflow(self._eval_dataset,
                                                  shard=hvd.rank(),
                                                  num_shards=hvd.size())
Esempio n. 2
0
def maskrcnn_upXconv_head(feature, num_category, num_convs, norm=None):
    """
    Args:
        feature (NxCx s x s): size is 7 in C4 models and 14 in FPN models.
        num_category(int):
        num_convs (int): number of convolution layers
        norm (str or None): either None or 'GN'

    Returns:
        mask_logits (N x num_category x 2s x 2s):
    """
    assert norm in [None, 'GN'], norm
    l = feature
    with argscope([Conv2D, Conv2DTranspose], data_format='channels_first',
                  kernel_initializer=tf.variance_scaling_initializer(
                      scale=2.0, mode='fan_out',
                      distribution='untruncated_normal' if get_tf_version_tuple() >= (1, 12) else 'normal')):
        # c2's MSRAFill is fan_out
        for k in range(num_convs):
            l = Conv2D('fcn{}'.format(k), l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu)
            if norm is not None:
                l = GroupNorm('gn{}'.format(k), l)
        l = Conv2DTranspose('deconv', l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu)
        l = Conv2D('conv', l, num_category, 1)
    return l
Esempio n. 3
0
    def f(X):
        """
        prob: n probabilities
        box: nx4 boxes

        Returns: n boolean, the selection
        """
        prob, box = X
        output_shape = tf.shape(prob, out_type=tf.int64)
        # filter by score threshold
        ids = tf.reshape(tf.where(prob > cfg.TEST.RESULT_SCORE_THRESH), [-1])
        prob = tf.gather(prob, ids)
        box = tf.gather(box, ids)
        # NMS within each class
        selection = tf.image.non_max_suppression(
            box, prob, cfg.TEST.RESULTS_PER_IM, cfg.TEST.FRCNN_NMS_THRESH)
        selection = tf.gather(ids, selection)

        if get_tf_version_tuple() >= (1, 13):
            sorted_selection = tf.sort(selection, direction='ASCENDING')
            mask = tf.sparse.SparseTensor(indices=tf.expand_dims(sorted_selection, 1),
                                          values=tf.ones_like(sorted_selection, dtype=tf.bool),
                                          dense_shape=output_shape)
            mask = tf.sparse.to_dense(mask, default_value=False)
        else:
            # this function is deprecated by TF
            sorted_selection = -tf.nn.top_k(-selection, k=tf.size(selection))[0]
            mask = tf.sparse_to_dense(
                sparse_indices=sorted_selection,
                output_shape=output_shape,
                sparse_values=True,
                default_value=False)
        return mask
Esempio n. 4
0
def mpusim_fully_connected(inputs,
                            units,
                            activation=None,
                            use_bias=True,
                            kernel_initializer=None,
                            bias_initializer=tf.zeros_initializer(),
                            kernel_regularizer=None,
                            bias_regularizer=None,
                            activity_regularizer=None,
                            activations_datatype_size_byte=1,
                            weights_datatype_size_byte=1,
                            results_datatype_size_byte=4,
                            systolic_array_height=256,
                            systolic_array_width=256,
                            activation_fifo_depth=8,
                            accumulator_array_height=4096,
                            log_file_output_dir='.',
                            model_name='unnamed'):
    """
    A wrapper around `mpusim_fc`.
    One difference to maintain backward-compatibility:
    Default weight initializer is variance_scaling_initializer(2.0).
    Variable Names:
    * ``W``: weights of shape [in_dim, out_dim]
    * ``b``: bias
    """
    if kernel_initializer is None:
        if get_tf_version_tuple() <= (1, 12):
            kernel_initializer = tf.contrib.layers.variance_scaling_initializer(2.0)  # deprecated
        else:
            kernel_initializer = tf.keras.initializers.VarianceScaling(2.0, distribution='untruncated_normal')

    inputs = batch_flatten(inputs)
    with rename_get_variable({'kernel': 'W', 'bias': 'b'}):
        layer = mpusim_fc(units=units,
                            activation=activation,
                            use_bias=use_bias,
                            kernel_initializer=kernel_initializer,
                            bias_initializer=bias_initializer,
                            kernel_regularizer=kernel_regularizer,
                            bias_regularizer=bias_regularizer,
                            activity_regularizer=activity_regularizer,
                            activations_datatype_size_byte=activations_datatype_size_byte,
                            weights_datatype_size_byte=weights_datatype_size_byte,
                            results_datatype_size_byte=results_datatype_size_byte,
                            systolic_array_height=systolic_array_height,
                            systolic_array_width=systolic_array_width,
                            activation_fifo_depth=activation_fifo_depth,
                            accumulator_array_height=accumulator_array_height,
                            log_file_output_dir=log_file_output_dir,
                            model_name=model_name,
                            _reuse=tf.get_variable_scope().reuse)
        ret = layer.apply(inputs, scope=tf.get_variable_scope())
        ret = tf.identity(ret, name='output')

    ret.variables = VariableHolder(W=layer.kernel)
    
    if use_bias:
        ret.variables.b = layer.bias
    return ret
Esempio n. 5
0
        def roi_func_extra(boxes, already_aligned_features=None):
            if already_aligned_features is None:
                aligned_features = multilevel_roi_align(features[:4], boxes, 7)
            else:
                # for hard example mining
                aligned_features = already_aligned_features
            tiled = tf.tile(roi_aligned_extra_features,
                            [tf.shape(aligned_features)[0], 1, 1, 1])
            concat_features = tf.concat((tiled, aligned_features), axis=1)

            with argscope(Conv2D,
                          data_format='channels_first',
                          kernel_initializer=tf.variance_scaling_initializer(
                              scale=2.0,
                              mode='fan_out',
                              distribution='untruncated_normal'
                              if get_tf_version_tuple() >=
                              (1, 12) else 'normal')):
                with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
                    reduced_features = Conv2D('conv_reduce',
                                              concat_features,
                                              256,
                                              1,
                                              activation=None)
            return reduced_features
Esempio n. 6
0
    def _setup_graph(self):
        num_gpu = cfg.TRAIN.NUM_GPUS
        if cfg.TRAINER == 'replicated':
            # TF bug in version 1.11, 1.12: https://github.com/tensorflow/tensorflow/issues/22750
            buggy_tf = get_tf_version_tuple() in [(1, 11), (1, 12)]

            # Use two predictor threads per GPU to get better throughput
            self.num_predictor = num_gpu if buggy_tf else num_gpu * 2
            self.predictors = [
                self._build_predictor(k % num_gpu)
                for k in range(self.num_predictor)
            ]
            self.dataflows = [
                get_eval_dataflow(self._eval_dataset,
                                  shard=k,
                                  num_shards=self.num_predictor)
                for k in range(self.num_predictor)
            ]
        else:
            # Only eval on the first machine,
            # Because evaluation assumes that all horovod workers share the filesystem.
            # Alternatively, can eval on all ranks and use allgather, but allgather sometimes hangs
            self._horovod_run_eval = hvd.rank() == hvd.local_rank()
            if self._horovod_run_eval:
                self.predictor = self._build_predictor(0)
                self.dataflow = get_eval_dataflow(self._eval_dataset,
                                                  shard=hvd.local_rank(),
                                                  num_shards=hvd.local_size())

            self.barrier = hvd.allreduce(tf.random_normal(shape=[1]))
Esempio n. 7
0
def conv(x, filters, kernel, strides=1, name=None):
    return Conv2D(filters, kernel, name=name,
                  strides=strides, use_bias=False, padding='same',
                  kernel_initializer=tf.keras.initializers.VarianceScaling(
                      scale=2.0, mode='fan_out',
                      distribution='untruncated_normal' if get_tf_version_tuple() >= (1, 12) else 'normal'),
                  kernel_regularizer=tf.keras.regularizers.l2(5e-5))(x)
Esempio n. 8
0
    def f(X):
        """
        prob: n probabilities
        box: nx4 boxes

        Returns: n boolean, the selection
        """
        prob, box = X
        # filter by score threshold
        ids = tf.reshape(tf.where(prob > cfg.TEST.RESULT_SCORE_THRESH), [-1])
        prob = tf.gather(prob, ids)
        box = tf.gather(box, ids)
        # NMS within each class
        selection = tf.image.non_max_suppression(box, prob,
                                                 cfg.TEST.RESULTS_PER_IM,
                                                 cfg.TEST.FRCNN_NMS_THRESH)
        selection = tf.to_int32(tf.gather(ids, selection))
        # sort available in TF>1.4.0
        # sorted_selection = tf.contrib.framework.sort(selection, direction='ASCENDING')
        sorted_selection = -tf.nn.top_k(-selection, k=tf.size(selection))[0]

        if get_tf_version_tuple() >= (1, 12):
            mask = tf.sparse.SparseTensor(indices=sorted_selection,
                                          values=tf.ones_like(sorted_selection,
                                                              dtype=tf.bool),
                                          dense_shape=tf.shape(prob))
            mask = tf.sparse.to_dense(mask, default_value=False)
        else:
            # deprecated by TF
            mask = tf.sparse_to_dense(sparse_indices=sorted_selection,
                                      output_shape=tf.shape(prob),
                                      sparse_values=True,
                                      default_value=False)
        return mask
def maskrcnn_upXconv_head(feature, num_category, seed_gen, num_convs, norm=None, fp16=False):
    """
    Args:
        feature: roi feature maps, Num_boxes x NumChannel x H_roi x W_roi,
        num_category(int): Number of total classes
        num_convs (int): number of convolution layers
        norm (str or None): either None or 'GN'

    Returns:
        mask_logits: Num_boxes x num_category x (2 * H_roi) x (2 * W_roi)
    """
    assert norm in [None, 'GN'], norm
    l = feature
    if fp16:
        l = tf.cast(l, tf.float16)
    with mixed_precision_scope(mixed=fp16):
      with argscope([Conv2D, Conv2DTranspose], data_format='channels_first',
                  kernel_initializer=tf.variance_scaling_initializer(
                      scale=2.0, mode='fan_out', seed=seed_gen.next(),
                      distribution='untruncated_normal' if get_tf_version_tuple() >= (1, 12) else 'normal')):
        # c2's MSRAFill is fan_out
        for k in range(num_convs):
            l = Conv2D('fcn{}'.format(k), l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu, seed=seed_gen.next())
            if norm is not None:
                if fp16: l = tf.cast(l, tf.float32)
                l = GroupNorm('gn{}'.format(k), l)
                if fp16: l = tf.cast(l, tf.float16)
        l = Conv2DTranspose('deconv', l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu, seed=seed_gen.next()) # 2x upsampling
        l = Conv2D('conv', l, num_category, 1, seed=seed_gen.next())
    if fp16:
        l = tf.cast(l, tf.float32)
    return l
Esempio n. 10
0
def Dropout(x, *args, **kwargs):
    """
    Same as `tf.layers.dropout`.
    However, for historical reasons, the first positional argument is
    interpreted as keep_prob rather than drop_prob.
    Explicitly use `rate=` keyword arguments to ensure things are consistent.
    """
    if 'is_training' in kwargs:
        kwargs['training'] = kwargs.pop('is_training')
    if len(args) > 0:
        if args[0] != 0.5:
            logger.warn(
                "The first positional argument to tensorpack.Dropout is the probability to keep, rather than to drop. "
                "This is different from the rate argument in tf.layers.Dropout due to historical reasons. "
                "To mimic tf.layers.Dropout, explicitly use keyword argument 'rate' instead"
            )
        rate = 1 - args[0]
    elif 'keep_prob' in kwargs:
        assert 'rate' not in kwargs, "Cannot set both keep_prob and rate!"
        rate = 1 - kwargs.pop('keep_prob')
    elif 'rate' in kwargs:
        rate = kwargs.pop('rate')
    else:
        rate = 0.5

    if kwargs.get('training', None) is None:
        kwargs['training'] = get_current_tower_context().is_training

    if get_tf_version_tuple() <= (1, 12):
        return tf.layers.dropout(x, rate=rate, **kwargs)
    else:
        return tf.nn.dropout(x, rate=rate if kwargs['training'] else 0.)
Esempio n. 11
0
def fastrcnn_losses(labels, label_logits, fg_boxes, fg_box_logits):
    """
    Args:
        labels: n,
        label_logits: nxC
        fg_boxes: nfgx4, encoded
        fg_box_logits: nfgxCx4 or nfgx1x4 if class agnostic

    Returns:
        label_loss, box_loss
    """
    label_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=labels, logits=label_logits)
    label_loss = tf.reduce_mean(label_loss, name='label_loss')

    fg_inds = tf.where(labels > 0)[:, 0]
    fg_labels = tf.gather(labels, fg_inds)
    num_fg = tf.size(fg_inds, out_type=tf.int64)
    empty_fg = tf.equal(num_fg, 0)
    if int(fg_box_logits.shape[1]) > 1:
        if get_tf_version_tuple() >= (1, 14):
            fg_labels = tf.expand_dims(fg_labels, axis=1)  # nfg x 1
            fg_box_logits = tf.gather(fg_box_logits, fg_labels, batch_dims=1)
        else:
            indices = tf.stack([tf.range(num_fg), fg_labels], axis=1)  # nfgx2
            fg_box_logits = tf.gather_nd(fg_box_logits, indices)
    fg_box_logits = tf.reshape(fg_box_logits, [-1, 4])  # nfg x 4

    with tf.name_scope('label_metrics'), tf.device('/cpu:0'):
        prediction = tf.argmax(label_logits, axis=1, name='label_prediction')
        correct = tf.cast(
            tf.equal(prediction, labels),
            tf.float32)  # boolean/integer gather is unavailable on GPU
        accuracy = tf.reduce_mean(correct, name='accuracy')
        fg_label_pred = tf.argmax(tf.gather(label_logits, fg_inds), axis=1)
        num_zero = tf.reduce_sum(tf.cast(tf.equal(fg_label_pred, 0), tf.int64),
                                 name='num_zero')
        false_negative = tf.where(empty_fg,
                                  0.,
                                  tf.cast(tf.truediv(num_zero, num_fg),
                                          tf.float32),
                                  name='false_negative')
        fg_accuracy = tf.where(empty_fg,
                               0.,
                               tf.reduce_mean(tf.gather(correct, fg_inds)),
                               name='fg_accuracy')

    box_loss = tf.reduce_sum(tf.abs(fg_boxes - fg_box_logits))
    box_loss = tf.truediv(box_loss,
                          tf.cast(tf.shape(labels)[0], tf.float32),
                          name='box_loss')

    add_moving_summary(label_loss, box_loss, accuracy, fg_accuracy,
                       false_negative,
                       tf.cast(num_fg, tf.float32, name='num_fg_label'))
    return [label_loss, box_loss]
Esempio n. 12
0
def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks):
    """
    Args:
        mask_logits: #fg x #category x h x w
        fg_labels: #fg, in 1~#class, int64
        fg_target_masks: #fg x h x w, float32
    """
    mask_logits = tf.transpose(mask_logits, [0, 3, 1, 2])

    if get_tf_version_tuple() >= (1, 14):
        mask_logits = tf.gather(mask_logits,
                                tf.reshape(fg_labels - 1, [-1, 1]),
                                batch_dims=1)
        mask_logits = tf.squeeze(mask_logits, axis=1)
    else:
        indices = tf.stack(
            [tf.range(tf.size(fg_labels, out_type=tf.int64)), fg_labels - 1],
            axis=1)  # #fgx2
        mask_logits = tf.gather_nd(mask_logits, indices)  # #fg x h x w

    mask_probs = tf.sigmoid(mask_logits)

    # add some training visualizations to tensorboard
    with tf.name_scope('mask_viz'):
        # print('#' * 50)
        # print('shape of fg_target_masks:' , fg_target_masks.shape)
        # print('shape of mask_probs: ', mask_probs.shape)
        # print('#' * 50)
        viz = tf.concat([fg_target_masks, mask_probs], axis=1)
        viz = tf.expand_dims(viz, 3)
        viz = tf.cast(viz * 255, tf.uint8, name='viz')
        tf.summary.image('mask_truth|pred', viz, max_outputs=10)

    loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=fg_target_masks,
                                                   logits=mask_logits)
    loss = tf.reduce_mean(loss, name='maskrcnn_loss')

    pred_label = mask_probs > 0.5
    truth_label = fg_target_masks > 0.5
    accuracy = tf.reduce_mean(tf.cast(tf.equal(pred_label, truth_label),
                                      tf.float32),
                              name='accuracy')
    pos_accuracy = tf.logical_and(tf.equal(pred_label, truth_label),
                                  tf.equal(truth_label, True))
    pos_accuracy = tf.reduce_mean(tf.cast(pos_accuracy, tf.float32),
                                  name='pos_accuracy')
    fg_pixel_ratio = tf.reduce_mean(tf.cast(truth_label, tf.float32),
                                    name='fg_pixel_ratio')

    add_moving_summary(loss, accuracy, fg_pixel_ratio, pos_accuracy)
    return loss
Esempio n. 13
0
def monkeypatch_tf_layers():
    if get_tf_version_tuple() < (1, 4):
        if not hasattr(tf.layers, 'Dense'):
            from tensorflow.python.layers.core import Dense
            tf.layers.Dense = Dense

            from tensorflow.python.layers.normalization import BatchNormalization
            tf.layers.BatchNormalization = BatchNormalization

            from tensorflow.python.layers.convolutional import Conv2DTranspose, Conv2D
            tf.layers.Conv2DTranspose = Conv2DTranspose
            tf.layers.Conv2D = Conv2D

            from tensorflow.python.layers.pooling import MaxPooling2D, AveragePooling2D
            tf.layers.MaxPooling2D = MaxPooling2D
            tf.layers.AveragePooling2D = AveragePooling2D
Esempio n. 14
0
def fastrcnn_Xconv1fc_head(feature, num_convs, norm=None):
    """
    Args:
        feature (NCHW):
        num_classes(int): num_category + 1
        num_convs (int): number of conv layers
        norm (str or None): either None or 'GN'

    Returns:
        2D head feature
    """
    assert norm in [None, "GN"], norm
    l = feature
    with argscope(
            Conv2D,
            data_format="channels_first",
            kernel_initializer=tf.variance_scaling_initializer(
                scale=2.0,
                mode="fan_out",
                distribution="untruncated_normal" if get_tf_version_tuple() >=
                (1, 12) else "normal",
            ),
    ):
        for k in range(num_convs):
            l = Conv2D("conv{}".format(k),
                       l,
                       cfg.FPN.FRCNN_CONV_HEAD_DIM,
                       3,
                       activation=tf.nn.relu)
            if norm is not None:
                l = GroupNorm("gn{}".format(k), l)
        l = FullyConnected(
            "fc",
            l,
            cfg.FPN.FRCNN_FC_HEAD_DIM,
            kernel_initializer=tf.variance_scaling_initializer(),
            activation=tf.nn.relu,
        )
    return l
def boxclass_Xconv1fc_head(feature, num_convs, norm=None):
    """
    Args:
        feature (NCHW):
        num_classes(int): num_category + 1
        num_convs (int): number of conv layers
        norm (str or None): either None or 'GN'

    Returns:
        2D head feature
    """
    assert norm in [None, 'GN'], norm
    l = feature
    with argscope(
            Conv2D,
            data_format='channels_first',
            kernel_initializer=tf.variance_scaling_initializer(
                scale=2.0,
                mode='fan_out',
                distribution='untruncated_normal' if get_tf_version_tuple() >=
                (1, 12) else 'normal')):
        for k in range(num_convs):
            l = Conv2D('conv{}'.format(k),
                       l,
                       cfg.FPN.BOXCLASS_CONV_HEAD_DIM,
                       3,
                       activation=tf.nn.relu)
            if norm is not None:
                l = GroupNorm('gn{}'.format(k), l)
        l = FullyConnected(
            'fc',
            l,
            cfg.FPN.BOXCLASS_FC_HEAD_DIM,
            kernel_initializer=tf.variance_scaling_initializer(),
            activation=tf.nn.relu)
    return l
Esempio n. 16
0
 def test(self):
     if get_tf_version_tuple() < (1, 4):
         return True  # requires leaky_relu
     self.assertSurvive(self.script, args=None)
Esempio n. 17
0
def mpusim_conv2d(
        inputs,
        filters,
        kernel_size,
        strides=(1, 1),
        padding='same',
        data_format='channels_last',
        dilation_rate=(1, 1),
        activation=None,
        use_bias=True,
        kernel_initializer=None,
        bias_initializer=tf.zeros_initializer(),
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        split=1,
        activations_datatype_size_byte=1,
        weights_datatype_size_byte=1,
        results_datatype_size_byte=4,
        systolic_array_height=256,
        systolic_array_width=256,
        activation_fifo_depth=8,
        accumulator_array_height=4096,
        log_file_output_dir='.',
        model_name='unnamed'):
    """
    Similar to `tf.layers.Conv2D`, but with some differences:

    1. Default kernel initializer is variance_scaling_initializer(2.0).
    2. Default padding is 'same'.
    3. Support 'split' argument to do group convolution.

    Variable Names:

    * ``W``: weights
    * ``b``: bias
    """
    if kernel_initializer is None:
        if get_tf_version_tuple() <= (1, 12):
            kernel_initializer = tf.contrib.layers.variance_scaling_initializer(2.0)
        else:
            kernel_initializer = tf.keras.initializers.VarianceScaling(2.0, distribution='untruncated_normal')
    dilation_rate = shape2d(dilation_rate)

    # group conv implementation
    data_format = get_data_format(data_format, keras_mode=False)
    in_shape = inputs.get_shape().as_list()
    channel_axis = 3 if data_format == 'NHWC' else 1
    in_channel = in_shape[channel_axis]
    assert in_channel is not None, "[mpusim_conv2d] Input cannot have unknown channel!"
    assert in_channel % split == 0

    assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \
        "Not supported by group conv or dilated conv!"

    out_channel = filters
    assert out_channel % split == 0
    assert dilation_rate == [1, 1] or get_tf_version_tuple() >= (1, 5), 'TF>=1.5 required for dilated conv.'

    kernel_shape = shape2d(kernel_size)
    filter_shape = kernel_shape + [in_channel / split, out_channel]
    stride = shape4d(strides, data_format=data_format)

    kwargs = dict(data_format=data_format)
    if get_tf_version_tuple() >= (1, 5):
        kwargs['dilations'] = shape4d(dilation_rate, data_format=data_format)

    W = tf.get_variable(
            'W', filter_shape, initializer=kernel_initializer)

    if use_bias:
        b = tf.get_variable('b', [out_channel], initializer=bias_initializer)

    if split == 1:
        conv = mpu_sim_conv2d_lib.mpu_sim_conv2d(inputs,
                                                    W,
                                                    activations_datatype_size_byte,
                                                    weights_datatype_size_byte,
                                                    results_datatype_size_byte,
                                                    systolic_array_height,
                                                    systolic_array_width,
                                                    activation_fifo_depth,
                                                    accumulator_array_height,
                                                    log_file_output_dir,
                                                    model_name,
                                                    stride,
                                                    padding.upper(),
                                                    **kwargs)
    else:
        
        inputs = tf.split(inputs, split, channel_axis)
        kernels = tf.split(W, split, 3)
        outputs = [mpu_sim_conv2d_lib.mpu_sim_conv2d(input_block,
                                                        kernel_block,
                                                        activations_datatype_size_byte,
                                                        weights_datatype_size_byte,
                                                        results_datatype_size_byte,
                                                        systolic_array_height,
                                                        systolic_array_width,
                                                        activation_fifo_depth,
                                                        accumulator_array_height,
                                                        log_file_output_dir,
                                                        model_name,
                                                        stride,
                                                        padding.upper(),
                                                        **kwargs)
                    for input_block, kernel_block in zip(inputs, kernels)]
        conv = tf.concat(outputs, channel_axis)

    ret = tf.nn.bias_add(conv, b, data_format=data_format) if use_bias else conv
    if activation is not None:
        ret = activation(ret)
    ret = tf.identity(ret, name='output')

    ret.variables = VariableHolder(W=W)
    if use_bias:
        ret.variables.b=b
    return ret
Esempio n. 18
0
            self._eval()


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--load', help='load a model for evaluation or training. Can overwrite BACKBONE.WEIGHTS')
    parser.add_argument('--logdir', help='log directory', default='train_log/maskrcnn')
    parser.add_argument('--visualize', action='store_true', help='visualize intermediate results')
    parser.add_argument('--evaluate', help="Run evaluation on COCO. "
                                           "This argument is the path to the output json evaluation file")
    parser.add_argument('--predict', help="Run prediction on a given image. "
                                          "This argument is the path to the input image file")
    parser.add_argument('--config', help="A list of KEY=VALUE to overwrite those defined in config.py",
                        nargs='+')

    if get_tf_version_tuple() < (1, 6):
        # https://github.com/tensorflow/tensorflow/issues/14657
        logger.warn("TF<1.6 has a bug which may lead to crash in FasterRCNN if you're unlucky.")

    args = parser.parse_args()
    if args.config:
        cfg.update_args(args.config)

    MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model()

    if args.visualize or args.evaluate or args.predict:
        assert args.load
        finalize_configs(is_training=False)

        if args.predict or args.visualize:
            cfg.TEST.RESULT_SCORE_THRESH = cfg.TEST.RESULT_SCORE_THRESH_VIS
Esempio n. 19
0
def MaskedConv2D(
        inputs,
        filters,
        kernel_size,
        strides=(1, 1),
        padding='same',
        data_format='channels_last',
        dilation_rate=(1, 1),
        activation=None,
        use_bias=True,
        kernel_initializer=None,
        bias_initializer=tf.zeros_initializer(),
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        split=1,
        masking=False):
    """
    A wrapper around `tf.layers.Conv2D`.
    Some differences to maintain backward-compatibility:

    1. Default kernel initializer is variance_scaling_initializer(2.0).
    2. Default padding is 'same'.
    3. Support 'split' argument to do group conv.

    Variable Names:

    * ``W``: weights
    * ``b``: bias
    """
    if kernel_initializer is None:
        if get_tf_version_tuple() <= (1, 12):
            kernel_initializer = tf.contrib.layers.variance_scaling_initializer(2.0)
        else:
            kernel_initializer = tf.keras.initializers.VarianceScaling(2.0, distribution='untruncated_normal')
    dilation_rate = shape2d(dilation_rate)

    if (masking == False) and (split == 1) and (dilation_rate == [1, 1]):
        # tf.layers.Conv2D has bugs with dilations (https://github.com/tensorflow/tensorflow/issues/26797)
        with rename_get_variable({'kernel': 'W', 'bias': 'b'}):
            layer = tf.layers.Conv2D(
                filters,
                kernel_size,
                strides=strides,
                padding=padding,
                data_format=data_format,
                dilation_rate=dilation_rate,
                activation=activation,
                use_bias=use_bias,
                kernel_initializer=kernel_initializer,
                bias_initializer=bias_initializer,
                kernel_regularizer=kernel_regularizer,
                bias_regularizer=bias_regularizer,
                activity_regularizer=activity_regularizer,
                _reuse=tf.get_variable_scope().reuse)
            ret = layer.apply(inputs, scope=tf.get_variable_scope())
            ret = tf.identity(ret, name='output')

        ret.variables = VariableHolder(W=layer.kernel)
        if use_bias:
            ret.variables.b = layer.bias

    else:
        if masking == True:
            assert split == 1, "Pruining group conv is not supported yet"

        # group conv implementation
        data_format = get_data_format(data_format, keras_mode=False)
        in_shape = inputs.get_shape().as_list()
        channel_axis = 3 if data_format == 'NHWC' else 1
        in_channel = in_shape[channel_axis]
        assert in_channel is not None, "[Conv2D] Input cannot have unknown channel!"
        assert in_channel % split == 0

        assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \
            "Not supported by group conv or dilated conv!"

        out_channel = filters
        assert out_channel % split == 0
        assert dilation_rate == [1, 1] or get_tf_version_tuple() >= (1, 5), 'TF>=1.5 required for dilated conv.'

        kernel_shape = shape2d(kernel_size)
        filter_shape = kernel_shape + [in_channel / split, out_channel]
        stride = shape4d(strides, data_format=data_format)

        kwargs = dict(data_format=data_format)
        if get_tf_version_tuple() >= (1, 5):
            kwargs['dilations'] = shape4d(dilation_rate, data_format=data_format)

        W = tf.get_variable(
            'W', filter_shape, initializer=kernel_initializer)

        if use_bias:
            b = tf.get_variable('b', [out_channel], initializer=bias_initializer)

        if split == 1:
            if masking:
                W = pruning.apply_mask(W)
            conv = tf.nn.conv2d(inputs, W, stride, padding.upper(), **kwargs)
        else:
            conv = None
            if get_tf_version_tuple() >= (1, 13):
                try:
                    conv = tf.nn.conv2d(inputs, W, stride, padding.upper(), **kwargs)
                except ValueError:
                    log_once("CUDNN group convolution support is only available with "
                             "https://github.com/tensorflow/tensorflow/pull/25818 . "
                             "Will fall back to a loop-based slow implementation instead!", 'warn')
            if conv is None:
                inputs = tf.split(inputs, split, channel_axis)
                kernels = tf.split(W, split, 3)
                outputs = [tf.nn.conv2d(i, k, stride, padding.upper(), **kwargs)
                           for i, k in zip(inputs, kernels)]
                conv = tf.concat(outputs, channel_axis)

        ret = tf.nn.bias_add(conv, b, data_format=data_format) if use_bias else conv
        if activation is not None:
            ret = activation(ret)
        ret = tf.identity(ret, name='output')

        ret.variables = VariableHolder(W=W)
        if use_bias:
            ret.variables.b = b
    return ret
Esempio n. 20
0
    tpviz.interactive_imshow(viz)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--load', help='load a model for evaluation or training. Can overwrite BACKBONE.WEIGHTS')
    parser.add_argument('--logdir', help='log directory', default='train_log/maskrcnn')
    parser.add_argument('--visualize', action='store_true', help='visualize intermediate results')
    parser.add_argument('--evaluate', help="Run evaluation. "
                                           "This argument is the path to the output json evaluation file")
    parser.add_argument('--predict', help="Run prediction on a given image. "
                                          "This argument is the path to the input image file")
    parser.add_argument('--config', help="A list of KEY=VALUE to overwrite those defined in config.py",
                        nargs='+')

    if get_tf_version_tuple() < (1, 6):
        # https://github.com/tensorflow/tensorflow/issues/14657
        logger.warn("TF<1.6 has a bug which may lead to crash in FasterRCNN if you're unlucky.")

    args = parser.parse_args()
    if args.config:
        cfg.update_args(args.config)

    MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model()
    DetectionDataset()  # initialize the config with information from our dataset

    if args.visualize or args.evaluate or args.predict:
        assert tf.test.is_gpu_available()
        assert args.load
        finalize_configs(is_training=False)
Esempio n. 21
0
def Conv3D(
        inputs,
        filters,
        kernel_size,
        strides=(1, 1, 1),
        padding='same',
        data_format='channels_last',
        dilation_rate=(1, 1, 1),
        activation=None,
        use_bias=True,
        kernel_initializer=tf.contrib.layers.variance_scaling_initializer(2.0),
        bias_initializer=tf.zeros_initializer(),
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        split=1):
    """
    A wrapper around `tf.layers.Conv3D`.
    Some differences to maintain backward-compatibility:
    1. Default kernel initializer is variance_scaling_initializer(2.0).
    2. Default padding is 'same'.
    3. Support 'split' argument to do group conv.
    Variable Names:
    * ``W``: weights
    * ``b``: bias
    """
    if split == 1:
        with rename_get_variable({'kernel': 'W', 'bias': 'b'}):
            layer = tf.layers.Conv3D(filters,
                                     kernel_size,
                                     strides=strides,
                                     padding=padding,
                                     data_format='channels_last',
                                     dilation_rate=dilation_rate,
                                     activation=activation,
                                     use_bias=use_bias,
                                     kernel_initializer=kernel_initializer,
                                     bias_initializer=bias_initializer,
                                     kernel_regularizer=kernel_regularizer,
                                     bias_regularizer=bias_regularizer,
                                     activity_regularizer=activity_regularizer)
            ret = layer.apply(inputs, scope=tf.get_variable_scope())
            ret = tf.identity(ret, name='output')

        ret.variables = VariableHolder(W=layer.kernel)
        if use_bias:
            ret.variables.b = layer.bias

    else:
        # group conv implementation
        data_format = get_data_format3d(data_format, tfmode=False)
        in_shape = inputs.get_shape().as_list()
        channel_axis = 4 if data_format == 'NDHWC' else 1
        in_channel = in_shape[channel_axis]
        assert in_channel is not None, "[Conv3D] Input cannot have unknown channel!"
        assert in_channel % split == 0

        assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \
            "Not supported by group conv now!"

        out_channel = filters
        assert out_channel % split == 0
        assert dilation_rate == (1, 1, 1) or get_tf_version_tuple() >= (
            1, 5), 'TF>=1.5 required for group dilated conv'

        kernel_shape = shape3d(kernel_size)
        filter_shape = kernel_shape + [in_channel / split, out_channel]
        stride = shape5d(strides, data_format=data_format)

        kwargs = dict(data_format=data_format)
        if get_tf_version_tuple() >= (1, 5):
            kwargs['dilations'] = shape4d(dilation_rate,
                                          data_format=data_format)

        W = tf.get_variable('W', filter_shape, initializer=kernel_initializer)

        if use_bias:
            b = tf.get_variable('b', [out_channel],
                                initializer=bias_initializer)

        inputs = tf.split(inputs, split, channel_axis)
        # tf.split(value,num_or_size_splits,axis=0, num=None,name='split')
        kernels = tf.split(W, split, 4)

        outputs = [
            tf.nn.conv3d(i, k, stride, padding.upper(), **kwargs)
            for i, k in zip(inputs, kernels)
        ]
        conv = tf.concat(outputs, channel_axis)
        if activation is None:
            activation = tf.identity
        ret = activation(tf.nn.bias_add(conv, b, data_format=data_format)
                         if use_bias else conv,
                         name='output')

        ret.variables = VariableHolder(W=W)
        if use_bias:
            ret.variables.b = b
    return ret
Esempio n. 22
0
def generate_fpn_proposals(multilevel_anchor_boxes, multilevel_box_logits,
                           multilevel_label_logits, orig_image_dims,
                           batch_size):
    """
    Generating the rois from the box logits and pick K with top label scores as
    the box proposals.

    Args:
        multilevel_box_logits:      #lvl [ BS x (NA * 4) x H_feature x W_feature ] boxes
        multilevel_label_logits:    #lvl [ BS x H_feature x W_feature x NA ] tensors
        orig_image_dimensions: Original (prepadding) image dimensions (h,w,c)   BS x 3
    Returns:
        boxes: K x 5 float
        scores:  1-D, K (logits)
    """
    prefix = "model_fpn.generate_fpn_proposals"
    bug_prefix = "GEN_PROPOSALS_BUG fpn"
    num_lvl = len(cfg.FPN.ANCHOR_STRIDES)
    assert len(multilevel_label_logits) == num_lvl
    orig_images_hw = orig_image_dims[:, :2]

    training = get_current_tower_context().is_training
    all_boxes = []
    all_scores = []
    if cfg.FPN.PROPOSAL_MODE == 'Level':
        fpn_nms_topk = cfg.RPN.TRAIN_PER_LEVEL_NMS_TOPK * batch_size if training else cfg.RPN.TEST_PER_LEVEL_NMS_TOPK
        for lvl in range(num_lvl):
            with tf.name_scope(f'Lvl{lvl}'):
                im_info = tf.cast(orig_images_hw, tf.float32)

                scores = multilevel_label_logits[
                    lvl]  # BS x H_feature x W_featurex NA
                bbox_deltas = tf.transpose(
                    multilevel_box_logits[lvl],
                    [0, 2, 3, 1])  #BS x H_feature x W_feature x (NA * 4)

                single_level_anchor_boxes = multilevel_anchor_boxes[lvl]
                single_level_anchor_boxes = tf.reshape(
                    single_level_anchor_boxes, (-1, 4))

                # # This is a custom tensorflow op that translates the bbox deltas into bounding box coordinates
                # and then runs NMS. See CODEBASE.md for more info
                #
                # roi: (# boxes for a single level) x 5, the 5 colunms arranged as: batch_index, x_1, y_1, x_2, y_2
                # rois_probs: 1-D, # boxes for a single level
                # name change in tf 1.15
                generate_bounding_box_proposals = tf.generate_bounding_box_proposals_v2 if get_tf_version_tuple()==(1,15) \
                                                    else tf.generate_bounding_box_proposals
                rois, rois_probs = generate_bounding_box_proposals(
                    scores,
                    bbox_deltas,
                    im_info,
                    single_level_anchor_boxes,
                    spatial_scale=1.0 / cfg.FPN.ANCHOR_STRIDES[lvl],
                    pre_nms_topn=fpn_nms_topk,
                    post_nms_topn=fpn_nms_topk,
                    nms_threshold=cfg.RPN.PROPOSAL_NMS_THRESH,
                    min_size=cfg.RPN.MIN_SIZE)
                # rois_probs = print_runtime_shape(f'rois_probs, lvl {lvl}', rois_probs, prefix=bug_prefix)
                all_boxes.append(rois)
                all_scores.append(rois_probs)

        proposal_boxes = tf.concat(all_boxes, axis=0)  # Num_all_rois x 5
        proposal_boxes = tf.reshape(proposal_boxes,
                                    [-1, 5])  # Num_all_rois x 5

        proposal_scores = tf.concat(all_scores, axis=0)  # 1-D Num_all_rois
        proposal_scores = tf.reshape(proposal_scores, [-1])  # 1-D Num_all_rois

        proposal_topk = tf.minimum(tf.size(proposal_scores), fpn_nms_topk)
        proposal_scores, topk_indices = tf.nn.top_k(proposal_scores,
                                                    k=proposal_topk,
                                                    sorted=False)
        proposal_boxes = tf.gather(proposal_boxes, topk_indices)  # K x 5

    else:
        raise RuntimeError(
            "Only level-wise predictions are supported with batches")

    return tf.stop_gradient(proposal_boxes, name='boxes'), \
        tf.stop_gradient(proposal_scores, name='scores')
Esempio n. 23
0
    parser.add_argument('--visualize',
                        action='store_true',
                        help='visualize intermediate results')
    parser.add_argument(
        '--evaluate',
        help="Run evaluation. "
        "This argument is the path to the output json evaluation file")
    parser.add_argument('--predict',
                        help="Run prediction on a given image. "
                        "This argument is the path to the input image file")
    parser.add_argument(
        '--config',
        help="A list of KEY=VALUE to overwrite those defined in config.py",
        nargs='+')

    if get_tf_version_tuple() < (1, 6):
        # https://github.com/tensorflow/tensorflow/issues/14657
        logger.warn(
            "TF<1.6 has a bug which may lead to crash in FasterRCNN if you're unlucky."
        )

    args = parser.parse_args()
    if args.config:
        cfg.update_args(args.config)

    MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model()
    DetectionDataset(
    )  # initialize the config with information from our dataset

    if args.visualize or args.evaluate or args.predict:
        if not tf.test.is_gpu_available():
Esempio n. 24
0
def generate_fpn_proposals_topk_per_image(multilevel_anchor_boxes,
                                          multilevel_box_logits,
                                          multilevel_label_logits,
                                          orig_image_dims, batch_size):
    """
    Args:
        multilevel_box_logits:      #lvl [ BS x (NAx4) x H x W ] boxes
        multilevel_label_logits:    #lvl [ BS x H x W x A ] tensors
        orig_image_dimensions: Original (prepadding) image dimensions (h,w,c)   BS x 3
    Returns:
        boxes: K x 5 float
        scores:  (#lvl x BS x K) vector       (logits)
    """

    num_lvl = len(cfg.FPN.ANCHOR_STRIDES)
    assert len(multilevel_label_logits) == num_lvl
    orig_images_hw = orig_image_dims[:, :2]

    training = get_current_tower_context().is_training
    all_boxes = []
    all_scores = []
    if cfg.FPN.PROPOSAL_MODE == 'Level':
        fpn_nms_topk = cfg.RPN.TRAIN_PER_LEVEL_NMS_TOPK if training else cfg.RPN.TEST_PER_LEVEL_NMS_TOPK
        boxes_list = []
        scores_list = []

        bs = batch_size if training else 1

        for i in range(bs):
            all_boxes = []
            all_scores = []
            for lvl in range(num_lvl):
                with tf.name_scope(f'Lvl{lvl}'):
                    im_info = tf.cast(orig_images_hw[i:(i + 1)], tf.float32)
                    # h, w

                    scores = multilevel_label_logits[lvl][i:(i + 1)]
                    bbox_deltas = tf.transpose(
                        multilevel_box_logits[lvl][i:(i + 1)], [0, 2, 3, 1])

                    single_level_anchor_boxes = multilevel_anchor_boxes[lvl]
                    single_level_anchor_boxes = tf.reshape(
                        single_level_anchor_boxes, (-1, 4))

                    # https://caffe2.ai/docs/operators-catalogue.html#generateproposals
                    generate_bounding_box_proposals = tf.generate_bounding_box_proposals_v2 if get_tf_version_tuple()==(1,15) \
                                                    else tf.generate_bounding_box_proposals
                    rois, rois_probs = generate_bounding_box_proposals(
                        scores,
                        bbox_deltas,
                        im_info,
                        single_level_anchor_boxes,
                        spatial_scale=1.0 / cfg.FPN.ANCHOR_STRIDES[lvl],
                        pre_nms_topn=fpn_nms_topk,
                        post_nms_topn=fpn_nms_topk,
                        nms_threshold=cfg.RPN.PROPOSAL_NMS_THRESH,
                        min_size=cfg.RPN.MIN_SIZE)

                    # rois_probs = print_runtime_shape(f'rois_probs, lvl {lvl}', rois_probs, prefix=bug_prefix)
                    all_boxes.append(
                        tf.concat((i + rois[:, :1], rois[:, 1:]), axis=1))
                    all_scores.append(rois_probs)

            proposal_boxes = tf.concat(all_boxes,
                                       axis=0)  # (#lvl x BS) x K x 5
            proposal_boxes = tf.reshape(proposal_boxes,
                                        [-1, 5])  # (#lvl x BS x K) x 5

            proposal_scores = tf.concat(all_scores, axis=0)  # (#lvl x BS) x K
            proposal_scores = tf.reshape(proposal_scores,
                                         [-1])  # (#lvl x BS x 5) vector

            topk = tf.minimum(tf.size(proposal_scores), fpn_nms_topk)
            topk_scores, topk_indices = tf.nn.top_k(proposal_scores,
                                                    k=topk,
                                                    sorted=False)

            boxes_list.append(tf.gather(proposal_boxes, topk_indices))
            scores_list.append(tf.gather(proposal_scores, topk_indices))

        #
        #        boxes_list = []
        #        scores_list = []
        #
        #        for i in range(batch_size):
        #            batch_ind = tf.squeeze(tf.where(tf.equal(proposal_boxes[:, 0], i)), axis=1)
        #            image_scores = tf.gather(proposal_scores, batch_ind)
        #            image_boxes = tf.gather(proposal_boxes, batch_ind)
        #
        #            image_proposal_topk = tf.minimum(tf.size(image_scores), fpn_nms_topk//batch_size)
        #            image_proposal_scores, image_topk_indices = tf.nn.top_k(image_scores, k=image_proposal_topk, sorted=False)
        #            boxes_list.append(tf.gather(image_boxes, image_topk_indices))
        #            scores_list.append(image_proposal_scores)

        boxes = tf.concat(boxes_list, axis=0)
        scores = tf.concat(scores_list, axis=0)

        #        proposal_topk = tf.minimum(tf.size(proposal_scores), fpn_nms_topk)
    #        proposal_scores, topk_indices = tf.nn.top_k(proposal_scores, k=proposal_topk, sorted=False)
    #        proposal_boxes = tf.gather(proposal_boxes, topk_indices)

    else:
        raise RuntimeError(
            "Only level-wise predictions are supported with batches")

    return tf.stop_gradient(boxes, name='boxes'), \
        tf.stop_gradient(scores, name='scores')
Esempio n. 25
0
 def test(self):
     if get_tf_version_tuple() < (1, 4):
         return True     # requires leaky_relu
     self.assertSurvive(self.script, args=None)
def BatchNorm3d(inputs,
                axis=None,
                training=None,
                momentum=0.9,
                epsilon=1e-5,
                center=True,
                scale=True,
                beta_initializer=tf.zeros_initializer(),
                gamma_initializer=tf.ones_initializer(),
                virtual_batch_size=None,
                data_format='channels_last',
                internal_update=False,
                sync_statistics=None):
    """
    Almost equivalent to `tf.layers.batch_normalization`, but different (and more powerful)
    in the following:
    1. Accepts an alternative `data_format` option when `axis` is None. For 2D input, this argument will be ignored.
    2. Default value for `momentum` and `epsilon` is different.
    3. Default value for `training` is automatically obtained from tensorpack's `TowerContext`, but can be overwritten.
    4. Support the `internal_update` option, which enables the use of BatchNorm layer inside conditionals.
    5. Support the `sync_statistics` option, which is very useful in small-batch models.
    Args:
        internal_update (bool): if False, add EMA update ops to
          `tf.GraphKeys.UPDATE_OPS`. If True, update EMA inside the layer by control dependencies.
          They are very similar in speed, but `internal_update=True` can be used
          when you have conditionals in your model, or when you have multiple networks to train.
          Corresponding TF issue: https://github.com/tensorflow/tensorflow/issues/14699
        sync_statistics: either None or "nccl". By default (None), it uses statistics of the input tensor to normalize.
          When set to "nccl", this layer must be used under tensorpack multi-gpu trainers,
          and it then uses per-machine (multiple GPU) statistics to normalize.
          Note that this implementation averages the per-tower E[x] and E[x^2] among towers to compute
          global mean&variance. The result is the global mean&variance only if each tower has the same batch size.
          This option has no effect when not training.
          This option is also known as "Cross-GPU BatchNorm" as mentioned in https://arxiv.org/abs/1711.07240.
          Corresponding TF issue: https://github.com/tensorflow/tensorflow/issues/18222
    Variable Names:
    * ``beta``: the bias term. Will be zero-inited by default.
    * ``gamma``: the scale term. Will be one-inited by default.
    * ``mean/EMA``: the moving average of mean.
    * ``variance/EMA``: the moving average of variance.
    Note:
        Combinations of ``training`` and ``ctx.is_training``:
        * ``training == ctx.is_training``: standard BN, EMA are maintained during training
          and used during inference. This is the default.
        * ``training and not ctx.is_training``: still use batch statistics in inference.
        * ``not training and ctx.is_training``: use EMA to normalize in
          training. This is useful when you load a pre-trained BN and
          don't want to fine tune the EMA. EMA will not be updated in
          this case.
    """
    # parse shapes
    data_format = get_data_format(data_format, tfmode=False)
    shape = inputs.get_shape().as_list()
    ndims = len(shape)
    # in 3d conv, we have 5d dim [batch, c, d, h, w]
    # assert ndims in [2, 4], ndims
    if sync_statistics is not None:
        sync_statistics = sync_statistics.lower()
    assert sync_statistics in [None, 'nccl', 'horovod'], sync_statistics

    if axis is None:
        if ndims == 2:
            data_format = 'NHWC'
            axis = 1
        elif ndims == 5:
            axis = 1 if data_format == 'NCHW' else 4
        else:
            axis = 1 if data_format == 'NCHW' else 3
    else:
        data_format = 'NCHW' if axis == 1 else 'NHWC'
    num_chan = shape[axis]

    # parse training/ctx
    ctx = get_current_tower_context()
    if training is None:
        training = ctx.is_training
    training = bool(training)
    TF_version = get_tf_version_tuple()
    TF_version = float(f"{TF_version[0]}.{TF_version[1]}")
    if not training and ctx.is_training:
        assert TF_version >= 1.4, \
            "Fine tuning a BatchNorm model with fixed statistics is only " \
            "supported after https://github.com/tensorflow/tensorflow/pull/12580 "
        if ctx.is_main_training_tower:  # only warn in first tower
            logger.warn(
                "[BatchNorm] Using moving_mean/moving_variance in training.")
        # Using moving_mean/moving_variance in training, which means we
        # loaded a pre-trained BN and only fine-tuning the affine part.

    if sync_statistics is None or not (training and ctx.is_training):
        coll_bk = backup_collection([tf.GraphKeys.UPDATE_OPS])
        with rename_get_variable({
                'moving_mean': 'mean/EMA',
                'moving_variance': 'variance/EMA'
        }):
            tf_args = dict(axis=axis,
                           momentum=momentum,
                           epsilon=epsilon,
                           center=center,
                           scale=scale,
                           beta_initializer=beta_initializer,
                           gamma_initializer=gamma_initializer,
                           fused=True,
                           _reuse=tf.get_variable_scope().reuse)
            if TF_version >= 1.5:
                tf_args['virtual_batch_size'] = virtual_batch_size
            else:
                assert virtual_batch_size is None, "Feature not supported in this version of TF!"
            layer = tf.layers.BatchNormalization(**tf_args)
            xn = layer.apply(inputs,
                             training=training,
                             scope=tf.get_variable_scope())

        # maintain EMA only on one GPU is OK, even in replicated mode.
        # because during training, EMA isn't used
        if ctx.is_main_training_tower:
            for v in layer.non_trainable_variables:
                add_model_variable(v)
        if not ctx.is_main_training_tower or internal_update:
            restore_collection(coll_bk)

        if training and internal_update:
            assert layer.updates
            with tf.control_dependencies(layer.updates):
                ret = tf.identity(xn, name='output')
        else:
            ret = tf.identity(xn, name='output')

        vh = ret.variables = VariableHolder(
            moving_mean=layer.moving_mean,
            mean=layer.moving_mean,  # for backward-compatibility
            moving_variance=layer.moving_variance,
            variance=layer.moving_variance)  # for backward-compatibility
        if scale:
            vh.gamma = layer.gamma
        if center:
            vh.beta = layer.beta
    else:
        red_axis = [0] if ndims == 2 else (
            [0, 2, 3] if axis == 1 else [0, 1, 2])
        if ndims == 5:
            red_axis = [0, 2, 3, 4] if axis == 1 else [0, 1, 2, 3]
        new_shape = None  # don't need to reshape unless ...
        if ndims == 4 and axis == 1:
            new_shape = [1, num_chan, 1, 1]
        if ndims == 5 and axis == 1:
            new_shape = [1, num_chan, 1, 1, 1]

        batch_mean = tf.reduce_mean(inputs, axis=red_axis)
        batch_mean_square = tf.reduce_mean(tf.square(inputs), axis=red_axis)

        if sync_statistics == 'nccl':
            if six.PY3 and TF_version <= 1.8 and ctx.is_main_training_tower:
                logger.warn(
                    "A TensorFlow bug will cause cross-GPU BatchNorm to fail. "
                    "Apply this patch: https://github.com/tensorflow/tensorflow/pull/20360"
                )

            from tensorflow.contrib.nccl.ops import gen_nccl_ops
            shared_name = re.sub('tower[0-9]+/', '',
                                 tf.get_variable_scope().name)
            num_dev = ctx.total
            batch_mean = gen_nccl_ops.nccl_all_reduce(
                input=batch_mean,
                reduction='sum',
                num_devices=num_dev,
                shared_name=shared_name + '_NCCL_mean') * (1.0 / num_dev)
            batch_mean_square = gen_nccl_ops.nccl_all_reduce(
                input=batch_mean_square,
                reduction='sum',
                num_devices=num_dev,
                shared_name=shared_name + '_NCCL_mean_square') * (1.0 /
                                                                  num_dev)
        elif sync_statistics == 'horovod':
            # Require https://github.com/uber/horovod/pull/331
            # Proof-of-concept, not ready yet.
            import horovod.tensorflow as hvd
            batch_mean = hvd.allreduce(batch_mean, average=True)
            batch_mean_square = hvd.allreduce(batch_mean_square, average=True)
        batch_var = batch_mean_square - tf.square(batch_mean)
        batch_mean_vec = batch_mean
        batch_var_vec = batch_var

        beta, gamma, moving_mean, moving_var = get_bn_variables(
            num_chan, scale, center, beta_initializer, gamma_initializer)
        if new_shape is not None:
            batch_mean = tf.reshape(batch_mean, new_shape)
            batch_var = tf.reshape(batch_var, new_shape)
            # Using fused_batch_norm(is_training=False) is actually slightly faster,
            # but hopefully this call will be JITed in the future.
            xn = tf.nn.batch_normalization(inputs, batch_mean, batch_var,
                                           tf.reshape(beta, new_shape),
                                           tf.reshape(gamma, new_shape),
                                           epsilon)
        else:
            xn = tf.nn.batch_normalization(inputs, batch_mean, batch_var, beta,
                                           gamma, epsilon)

        if ctx.is_main_training_tower:
            ret = update_bn_ema(xn, batch_mean_vec, batch_var_vec, moving_mean,
                                moving_var, momentum, internal_update)
        else:
            ret = tf.identity(xn, name='output')

        vh = ret.variables = VariableHolder(
            moving_mean=moving_mean,
            mean=moving_mean,  # for backward-compatibility
            moving_variance=moving_var,
            variance=moving_var)  # for backward-compatibility
        if scale:
            vh.gamma = gamma
        if center:
            vh.beta = beta
    return ret
Esempio n. 27
0
def main(args):
    # "spawn/forkserver" is safer than the default "fork" method and
    # produce more deterministic behavior & memory saving
    # However its limitation is you cannot pass a lambda function to subprocesses.
    import multiprocessing as mp
    mp.set_start_method('spawn')

    if get_tf_version_tuple() < (1, 6):
        # https://github.com/tensorflow/tensorflow/issues/14657
        logger.warn(
            "TF<1.6 has a bug which may lead to crash in FasterRCNN if you're unlucky."
        )

    # Setup logging ...
    is_horovod = cfg.TRAINER == 'horovod'
    if is_horovod:
        hvd.init()
    if not is_horovod or hvd.rank() == 0:
        logger.set_logger_dir(args.logdir, 'd')
    logger.info("Environment Information:\n" + collect_env_info())

    finalize_configs(is_training=True)

    # Create model
    MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model()

    # Compute the training schedule from the number of GPUs ...
    stepnum = cfg.TRAIN.STEPS_PER_EPOCH
    # warmup is step based, lr is epoch based
    init_lr = cfg.TRAIN.WARMUP_INIT_LR * min(8. / cfg.TRAIN.NUM_GPUS, 1.)
    warmup_schedule = [(0, init_lr), (cfg.TRAIN.WARMUP, cfg.TRAIN.BASE_LR)]
    warmup_end_epoch = cfg.TRAIN.WARMUP * 1. / stepnum
    lr_schedule = [(int(warmup_end_epoch + 0.5), cfg.TRAIN.BASE_LR)]

    factor = 8. / cfg.TRAIN.NUM_GPUS
    for idx, steps in enumerate(cfg.TRAIN.LR_SCHEDULE[:-1]):
        mult = 0.1**(idx + 1)
        lr_schedule.append(
            (steps * factor // stepnum, cfg.TRAIN.BASE_LR * mult))
    logger.info("Warm Up Schedule (steps, value): " + str(warmup_schedule))
    logger.info("LR Schedule (epochs, value): " + str(lr_schedule))
    train_dataflow = get_train_dataflow()
    # This is what's commonly referred to as "epochs"
    total_passes = cfg.TRAIN.LR_SCHEDULE[-1] * 8 / train_dataflow.size()
    logger.info(
        "Total passes of the training set is: {:.5g}".format(total_passes))

    # Create callbacks ...
    callbacks = [
        PeriodicCallback(ModelSaver(max_to_keep=10,
                                    keep_checkpoint_every_n_hours=1),
                         every_k_epochs=cfg.TRAIN.CHECKPOINT_PERIOD),
        # linear warmup
        ScheduledHyperParamSetter('learning_rate',
                                  warmup_schedule,
                                  interp='linear',
                                  step_based=True),
        ScheduledHyperParamSetter('learning_rate', lr_schedule),
        GPUMemoryTracker(),
        HostMemoryTracker(),
        ThroughputTracker(samples_per_step=cfg.TRAIN.NUM_GPUS),
        EstimatedTimeLeft(median=True),
        SessionRunTimeout(60000)  # 1 minute timeout
        #AMLCallback()
        #GPUUtilizationTracker()
    ]
    if cfg.TRAIN.EVAL_PERIOD > 0:
        callbacks.extend([
            EvalCallback(dataset, *MODEL.get_inference_tensor_names(),
                         args.logdir) for dataset in cfg.DATA.VAL
        ])

    if is_horovod and hvd.rank() > 0:
        session_init = None
    else:
        if args.load:
            # ignore mismatched values, so you can `--load` a model for fine-tuning
            session_init = SmartInit(args.load, ignore_mismatch=True)
        else:
            session_init = SmartInit(cfg.BACKBONE.WEIGHTS)

    traincfg = TrainConfig(model=MODEL,
                           data=QueueInput(train_dataflow),
                           callbacks=callbacks,
                           monitors=[AMLMonitor()],
                           steps_per_epoch=stepnum,
                           max_epoch=cfg.TRAIN.LR_SCHEDULE[-1] * factor //
                           stepnum,
                           session_init=session_init,
                           starting_epoch=cfg.TRAIN.STARTING_EPOCH)

    if is_horovod:
        trainer = HorovodTrainer(average=False)
    else:
        # nccl mode appears faster than cpu mode
        trainer = SyncMultiGPUTrainerReplicated(cfg.TRAIN.NUM_GPUS,
                                                average=False,
                                                mode='nccl')
    launch_train_with_config(traincfg, trainer)
Esempio n. 28
0
def Conv(inputs,
         filters,
         kernel_size,
         strides=(1, 1),
         padding='same',
         data_format='channels_last',
         dilation_rate=(1, 1),
         activation=None,
         use_bias=True,
         kernel_initializer=None,
         bias_initializer=tf.zeros_initializer(),
         kernel_regularizer=None,
         bias_regularizer=None,
         activity_regularizer=None,
         split=1,
         norm=False):
    """
    Similar to `tf.layers.Conv2D`, but with some differences:
    1. Default kernel initializer is variance_scaling_initializer(2.0).
    2. Default padding is 'same'.
    3. Support 'split' argument to do group convolution.
    Variable Names:
    * ``W``: weights
    * ``b``: bias
    """
    if kernel_initializer is None:
        if get_tf_version_tuple() <= (1, 12):
            kernel_initializer = tf.contrib.layers.variance_scaling_initializer(
                2.0)  # deprecated
        else:
            kernel_initializer = tf.keras.initializers.VarianceScaling(
                2.0, distribution='untruncated_normal')
    dilation_rate = shape2d(dilation_rate)

    if True:
        # group conv implementation
        data_format = get_data_format(data_format, keras_mode=False)
        in_shape = inputs.get_shape().as_list()
        channel_axis = 3 if data_format == 'NHWC' else 1
        in_channel = in_shape[channel_axis]
        assert in_channel is not None, "[Conv2D] Input cannot have unknown channel!"
        assert in_channel % split == 0

        assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \
            "Not supported by group conv or dilated conv!"

        out_channel = filters
        assert out_channel % split == 0
        assert dilation_rate == [1, 1] or get_tf_version_tuple() >= (
            1, 5), 'TF>=1.5 required for dilated conv.'

        kernel_shape = shape2d(kernel_size)
        filter_shape = kernel_shape + [in_channel // split, out_channel]
        stride = shape4d(strides, data_format=data_format)

        kwargs = {"data_format": data_format}
        if get_tf_version_tuple() >= (1, 5):
            kwargs['dilations'] = shape4d(dilation_rate,
                                          data_format=data_format)

        # matching input dtype (ex. tf.float16) since the default dtype of variable if tf.float32
        inputs_dtype = inputs.dtype
        W = tf.get_variable('parseweigth',
                            filter_shape,
                            dtype=inputs_dtype,
                            initializer=kernel_initializer)
        if norm:
            use_bias = False
            W = tf.reshape(W, kernel_shape + [4, in_channel // 4, out_channel])
            W = tf.nn.softmax(W, 2)
            W = tf.reshape(W, filter_shape)
        #dynamics = tf.reduce_mean(inputs, 0)
        #dynamics = tf.transpose(dynamics, [1,2,0])
        #dynamics = tf.image.resize_images(dynamics, kernel_shape)
        #dynamics = tf.expand_dims(dynamics, -1)
        #W = W  +  0.001 * dynamics #tf.random_normal(shape = tf.shape(W), mean = 0.0, stddev = 0.012, dtype = tf.float32)

        #W = W *tf.random_uniform(shape=W.get_shape().as_list(), minval=0., maxval=2.)

        if use_bias:
            b = tf.get_variable('parsebias', [out_channel],
                                dtype=inputs_dtype,
                                initializer=bias_initializer)

        if split == 1:
            conv = tf.nn.conv2d(inputs, W, stride, padding.upper(), **kwargs)
        else:
            try:
                conv = tf.nn.conv2d(inputs, W, stride, padding.upper(),
                                    **kwargs)
            except ValueError:
                log_once(
                    "CUDNN group convolution support is only available with "
                    "https://github.com/tensorflow/tensorflow/pull/25818 . "
                    "Will fall back to a loop-based slow implementation instead!",
                    'warn')

        ret = tf.nn.bias_add(conv, b,
                             data_format=data_format) if use_bias else conv
        if activation is not None:
            ret = activation(ret)
        ret = tf.identity(ret, name='output')

        ret.variables = VariableHolder(W=W)
        if use_bias:
            ret.variables.b = b
    return ret
Esempio n. 29
0
    parser.add_argument('--visualize',
                        action='store_true',
                        help='visualize intermediate results')
    parser.add_argument(
        '--evaluate',
        help="Run evaluation on COCO. "
        "This argument is the path to the output json evaluation file")
    parser.add_argument('--predict',
                        help="Run prediction on a given image. "
                        "This argument is the path to the input image file")
    parser.add_argument(
        '--config',
        help="A list of KEY=VALUE to overwrite those defined in config.py",
        nargs='+')

    if get_tf_version_tuple() < (1, 6):
        # https://github.com/tensorflow/tensorflow/issues/14657
        logger.warn(
            "TF<1.6 has a bug which may lead to crash in FasterRCNN if you're unlucky."
        )
    if get_tf_version_tuple() == (1, 11):
        # https://github.com/tensorflow/tensorflow/issues/22750
        logger.warn("TF=1.11 has a bug which leads to crash in inference.")

    args = parser.parse_args()
    if args.config:
        cfg.update_args(args.config)

    MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model()

    if args.visualize or args.evaluate or args.predict:
Esempio n. 30
0
 def test(self):
     return True  # https://github.com/tensorflow/tensorflow/issues/24517
     if get_tf_version_tuple() < (1, 4):
         return True  # requires leaky_relu
     self.assertSurvive(self.script, args=None)
Esempio n. 31
0
def fastrcnn_predictions(boxes, scores):
    """
    Generate final results from predictions of all proposals.

    Args:
        boxes: n#classx4 floatbox in float32
        scores: nx#class

    Returns:
        boxes: Kx4
        scores: K
        labels: K
    """
    assert boxes.shape[1] == cfg.DATA.NUM_CLASS
    assert scores.shape[1] == cfg.DATA.NUM_CLASS
    boxes = tf.transpose(boxes, [1, 0, 2])[1:, :, :]  # #catxnx4
    scores = tf.transpose(scores[:, 1:], [1, 0])  # #catxn

    def f(X):
        """
        prob: n probabilities
        box: nx4 boxes

        Returns: n boolean, the selection
        """
        prob, box = X
        output_shape = tf.shape(prob, out_type=tf.int64)
        # filter by score threshold
        ids = tf.reshape(tf.where(prob > cfg.TEST.RESULT_SCORE_THRESH), [-1])
        prob = tf.gather(prob, ids)
        box = tf.gather(box, ids)
        # NMS within each class
        selection = tf.image.non_max_suppression(box, prob,
                                                 cfg.TEST.RESULTS_PER_IM,
                                                 cfg.TEST.FRCNN_NMS_THRESH)
        selection = tf.gather(ids, selection)

        if get_tf_version_tuple() >= (1, 13):
            sorted_selection = tf.sort(selection, direction='ASCENDING')
            mask = tf.sparse.SparseTensor(indices=tf.expand_dims(
                sorted_selection, 1),
                                          values=tf.ones_like(sorted_selection,
                                                              dtype=tf.bool),
                                          dense_shape=output_shape)
            mask = tf.sparse.to_dense(mask, default_value=False)
        else:
            # this function is deprecated by TF
            sorted_selection = -tf.nn.top_k(-selection,
                                            k=tf.size(selection))[0]
            mask = tf.sparse_to_dense(sparse_indices=sorted_selection,
                                      output_shape=output_shape,
                                      sparse_values=True,
                                      default_value=False)
        return mask

    # TF bug in version 1.11, 1.12: https://github.com/tensorflow/tensorflow/issues/22750
    buggy_tf = get_tf_version_tuple() in [(1, 11), (1, 12)]
    masks = tf.map_fn(f, (scores, boxes),
                      dtype=tf.bool,
                      parallel_iterations=1 if buggy_tf else 10)  # #cat x N
    selected_indices = tf.where(
        masks)  # #selection x 2, each is (cat_id, box_id)
    scores = tf.boolean_mask(scores, masks)

    # filter again by sorting scores
    topk_scores, topk_indices = tf.nn.top_k(scores,
                                            tf.minimum(cfg.TEST.RESULTS_PER_IM,
                                                       tf.size(scores)),
                                            sorted=False)
    filtered_selection = tf.gather(selected_indices, topk_indices)
    cat_ids, box_ids = tf.unstack(filtered_selection, axis=1)

    final_scores = tf.identity(topk_scores, name='scores')
    final_labels = tf.add(cat_ids, 1, name='labels')
    final_ids = tf.stack([cat_ids, box_ids], axis=1, name='all_ids')
    final_boxes = tf.gather_nd(boxes, final_ids, name='boxes')
    return final_boxes, final_scores, final_labels
Esempio n. 32
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# File: dump-model-params.py

import argparse
import numpy as np
import os
import six
import tensorflow as tf

from tensorpack import logger
from tensorpack.tfutils import varmanip
from tensorpack.tfutils.common import get_op_tensor_name, get_tf_version_tuple

TF_version = get_tf_version_tuple()


def _import_external_ops(message):
    if "horovod" in message.lower():
        logger.info("Importing horovod ...")
        import horovod.tensorflow  # noqa
        return
    if "MaxBytesInUse" in message:
        logger.info("Importing memory_stats ...")
        from tensorflow.contrib.memory_stats import MaxBytesInUse  # noqa
        return
    if 'Nccl' in message:
        logger.info("Importing nccl ...")
        if TF_version <= (1, 12):
            try:
                from tensorflow.contrib.nccl.python.ops.nccl_ops import _validate_and_load_nccl_so