Ejemplo n.º 1
0
def fastrcnn_Xconv1fc_head(feature, num_convs, norm=None):
    """
    Args:
        feature (NCHW):
        num_classes(int): num_category + 1
        num_convs (int): number of conv layers
        norm (str or None): either None or 'GN'

    Returns:
        2D head feature
    """
    assert norm in [None, 'GN'], norm
    l = feature
    with argscope(Conv2D,
                  data_format='channels_first',
                  kernel_initializer=tfv1.variance_scaling_initializer(
                      scale=2.0,
                      mode='fan_out',
                      distribution='untruncated_normal')):
        for k in range(num_convs):
            l = Conv2D('conv{}'.format(k),
                       l,
                       cfg.FPN.FRCNN_CONV_HEAD_DIM,
                       3,
                       activation=tf.nn.relu)
            if norm is not None:
                l = GroupNorm('gn{}'.format(k), l)
        l = FullyConnected(
            'fc',
            l,
            cfg.FPN.FRCNN_FC_HEAD_DIM,
            kernel_initializer=tfv1.variance_scaling_initializer(),
            activation=tf.nn.relu)
    return l
Ejemplo n.º 2
0
def backbone_scope(freeze):
    """
    Args:
        freeze (bool): whether to freeze all the variables under the scope
    """
    def nonlin(x):
        x = get_norm()(x)
        return tf.nn.relu(x)

    with argscope([Conv2D, MaxPooling, BatchNorm], data_format='channels_first'), \
            argscope(Conv2D, use_bias=False, activation=nonlin,
                     kernel_initializer=tfv1.variance_scaling_initializer(
                         scale=2.0, mode='fan_out')), \
            ExitStack() as stack:
        if cfg.BACKBONE.NORM in ['FreezeBN', 'SyncBN']:
            if freeze or cfg.BACKBONE.NORM == 'FreezeBN':
                stack.enter_context(argscope(BatchNorm, training=False))
            else:
                stack.enter_context(
                    argscope(BatchNorm,
                             sync_statistics='nccl'
                             if cfg.TRAINER == 'replicated' else 'horovod'))

        if freeze:
            stack.enter_context(
                freeze_variables(stop_gradient=False, skip_collection=True))
        else:
            # the layers are not completely freezed, but we may want to only freeze the affine
            if cfg.BACKBONE.FREEZE_AFFINE:
                stack.enter_context(custom_getter_scope(freeze_affine_getter))
        yield
Ejemplo n.º 3
0
def DepthConv(x, out_channel, kernel_shape, padding='SAME', stride=1,
              W_init=None, activation=tf.identity):
    in_shape = x.get_shape().as_list()
    in_channel = in_shape[1]
    assert out_channel % in_channel == 0, (out_channel, in_channel)
    channel_mult = out_channel // in_channel

    if W_init is None:
        W_init = tf.variance_scaling_initializer(2.0)
    kernel_shape = [kernel_shape, kernel_shape]
    filter_shape = kernel_shape + [in_channel, channel_mult]

    W = tf.get_variable('W', filter_shape, initializer=W_init)
    conv = tf.nn.depthwise_conv2d(x, W, [1, 1, stride, stride], padding=padding, data_format='NCHW')
    return activation(conv, name='output')
Ejemplo n.º 4
0
def fpn_model(features):
    """
    Args:
        features ([tf.Tensor]): ResNet features c2-c5

    Returns:
        [tf.Tensor]: FPN features p2-p6
    """
    assert len(features) == 4, features
    num_channel = cfg.FPN.NUM_CHANNEL

    use_gn = cfg.FPN.NORM == 'GN'

    def upsample2x(name, x):
        try:
            resize = tf.compat.v2.image.resize_images
            with tf.name_scope(name):
                shp2d = tf.shape(x)[2:]
                x = tf.transpose(x, [0, 2, 3, 1])
                x = resize(x, shp2d * 2, 'nearest')
                x = tf.transpose(x, [0, 3, 1, 2])
                return x
        except AttributeError:
            return FixedUnPooling(
                name, x, 2, unpool_mat=np.ones((2, 2), dtype='float32'),
                data_format='channels_first')

    with argscope(Conv2D, data_format='channels_first',
                  activation=tf.identity, use_bias=True,
                  kernel_initializer=tfv1.variance_scaling_initializer(scale=1.)):
        lat_2345 = [Conv2D('lateral_1x1_c{}'.format(i + 2), c, num_channel, 1)
                    for i, c in enumerate(features)]
        if use_gn:
            lat_2345 = [GroupNorm('gn_c{}'.format(i + 2), c) for i, c in enumerate(lat_2345)]
        lat_sum_5432 = []
        for idx, lat in enumerate(lat_2345[::-1]):
            if idx == 0:
                lat_sum_5432.append(lat)
            else:
                lat = lat + upsample2x('upsample_lat{}'.format(6 - idx), lat_sum_5432[-1])
                lat_sum_5432.append(lat)
        p2345 = [Conv2D('posthoc_3x3_p{}'.format(i + 2), c, num_channel, 3)
                 for i, c in enumerate(lat_sum_5432[::-1])]
        if use_gn:
            p2345 = [GroupNorm('gn_p{}'.format(i + 2), c) for i, c in enumerate(p2345)]
        p6 = MaxPooling('maxpool_p6', p2345[-1], pool_size=1, strides=2, data_format='channels_first', padding='VALID')
        return p2345 + [p6]
Ejemplo n.º 5
0
 def get_logits(self, image):
     with argscope(Conv2D, kernel_initializer=tf.variance_scaling_initializer(scale=2.)), \
             argscope([Conv2D, MaxPooling, BatchNorm], data_format='channels_first'):
         logits = (
             LinearWrap(image).apply(convnormrelu, 'conv1_1',
                                     64).apply(convnormrelu, 'conv1_2',
                                               64).MaxPooling('pool1', 2)
             # 112
             .apply(convnormrelu, 'conv2_1',
                    128).apply(convnormrelu, 'conv2_2',
                               128).MaxPooling('pool2', 2)
             # 56
             .apply(convnormrelu, 'conv3_1',
                    256).apply(convnormrelu, 'conv3_2',
                               256).apply(convnormrelu, 'conv3_3',
                                          256).MaxPooling('pool3', 2)
             # 28
             .apply(convnormrelu, 'conv4_1',
                    512).apply(convnormrelu, 'conv4_2',
                               512).apply(convnormrelu, 'conv4_3',
                                          512).MaxPooling('pool4', 2)
             # 14
             .apply(convnormrelu, 'conv5_1',
                    512).apply(convnormrelu, 'conv5_2',
                               512).apply(convnormrelu, 'conv5_3',
                                          512).MaxPooling('pool5', 2)
             # 7
             .FullyConnected(
                 'fc6',
                 4096,
                 kernel_initializer=tf.random_normal_initializer(
                     stddev=0.001)).tf.nn.relu(name='fc6_relu').
             Dropout('drop0', rate=0.5).FullyConnected(
                 'fc7',
                 4096,
                 kernel_initializer=tf.random_normal_initializer(
                     stddev=0.001)).tf.nn.relu(name='fc7_relu').Dropout(
                         'drop1', rate=0.5).FullyConnected(
                             'fc8',
                             1000,
                             kernel_initializer=tf.
                             random_normal_initializer(stddev=0.01))())
     add_param_summary(('.*', ['histogram', 'rms']))
     return logits
Ejemplo n.º 6
0
def resnet_backbone(image, num_blocks, group_func, block_func):
    with argscope(Conv2D,
                  use_bias=False,
                  kernel_initializer=tfv1.variance_scaling_initializer(
                      scale=2.0, mode='fan_out')):
        # Note that TF pads the image by [2, 3] instead of [3, 2].
        # Similar things happen in later stride=2 layers as well.
        l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU)
        l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME')
        l = group_func('group0', l, block_func, 64, num_blocks[0], 1)
        l = group_func('group1', l, block_func, 128, num_blocks[1], 2)
        l = group_func('group2', l, block_func, 256, num_blocks[2], 2)
        l = group_func('group3', l, block_func, 512, num_blocks[3], 2)
        l = GlobalAvgPooling('gap', l)
        logits = FullyConnected(
            'linear',
            l,
            1000,
            kernel_initializer=tf.random_normal_initializer(stddev=0.01))
    return logits
Ejemplo n.º 7
0
def maskrcnn_upXconv_head(feature, num_category, num_convs, norm=None):
    """
    Args:
        feature (NxCx s x s): size is 7 in C4 models and 14 in FPN models.
        num_category(int):
        num_convs (int): number of convolution layers
        norm (str or None): either None or 'GN'

    Returns:
        mask_logits (N x num_category x 2s x 2s):
    """
    assert norm in [None, 'GN'], norm
    l = feature
    with argscope([Conv2D, Conv2DTranspose],
                  data_format='channels_first',
                  kernel_initializer=tfv1.variance_scaling_initializer(
                      scale=2.0,
                      mode='fan_out',
                      distribution='untruncated_normal')):
        # c2's MSRAFill is fan_out
        for k in range(num_convs):
            l = Conv2D('fcn{}'.format(k),
                       l,
                       cfg.MRCNN.HEAD_DIM,
                       3,
                       activation=tf.nn.relu)
            if norm is not None:
                l = GroupNorm('gn{}'.format(k), l)
        l = Conv2DTranspose('deconv',
                            l,
                            cfg.MRCNN.HEAD_DIM,
                            2,
                            strides=2,
                            activation=tf.nn.relu)
        l = Conv2D(
            'conv',
            l,
            num_category,
            1,
            kernel_initializer=tf.random_normal_initializer(stddev=0.001))
    return l
Ejemplo n.º 8
0
def fastrcnn_2fc_head(feature):
    """
    Args:
        feature (any shape):

    Returns:
        2D head feature
    """
    dim = cfg.FPN.FRCNN_FC_HEAD_DIM
    init = tfv1.variance_scaling_initializer()
    hidden = FullyConnected('fc6',
                            feature,
                            dim,
                            kernel_initializer=init,
                            activation=tf.nn.relu)
    hidden = FullyConnected('fc7',
                            hidden,
                            dim,
                            kernel_initializer=init,
                            activation=tf.nn.relu)
    return hidden
Ejemplo n.º 9
0
    def get_logits(self, image):
        gauss_init = tf.random_normal_initializer(stddev=0.01)
        with argscope(Conv2D,
                      kernel_initializer=tf.variance_scaling_initializer(scale=2.)), \
                argscope([Conv2D, FullyConnected], activation=tf.nn.relu), \
                argscope([Conv2D, MaxPooling], data_format='channels_last'):
            # necessary padding to get 55x55 after conv1
            image = tf.pad(image, [[0, 0], [2, 2], [2, 2], [0, 0]])
            l = Conv2D('conv1',
                       image,
                       filters=96,
                       kernel_size=11,
                       strides=4,
                       padding='VALID')
            # size: 55
            visualize_conv1_weights(l.variables.W)
            l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm1')
            l = MaxPooling('pool1', l, 3, strides=2, padding='VALID')
            # 27
            l = Conv2D('conv2', l, filters=256, kernel_size=5, split=2)
            l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm2')
            l = MaxPooling('pool2', l, 3, strides=2, padding='VALID')
            # 13
            l = Conv2D('conv3', l, filters=384, kernel_size=3)
            l = Conv2D('conv4', l, filters=384, kernel_size=3, split=2)
            l = Conv2D('conv5', l, filters=256, kernel_size=3, split=2)
            l = MaxPooling('pool3', l, 3, strides=2, padding='VALID')

            l = FullyConnected('fc6',
                               l,
                               4096,
                               kernel_initializer=gauss_init,
                               bias_initializer=tf.ones_initializer())
            l = Dropout(l, rate=0.5)
            l = FullyConnected('fc7', l, 4096, kernel_initializer=gauss_init)
            l = Dropout(l, rate=0.5)
        logits = FullyConnected('fc8', l, 1000, kernel_initializer=gauss_init)
        return logits