Exemple #1
0
def phi_subnet(x, is_training, upsample):
    """
    Arguments:
        x: a float tensor with shape [b, h, w, c].
        is_training: a boolean.
        upsample: an integer.
    Returns:
        a float tensor with shape [b, upsample * h, upsample * w, depth].
    """

    x = conv2d_same(x, DEPTH, kernel_size=3, name='conv1')
    x = batch_norm_relu(x, is_training, name='bn1')
    x = conv2d_same(x, DEPTH, kernel_size=3, name='conv2')
    x = batch_norm_relu(x, is_training, name='bn2')

    if DATA_FORMAT == 'channels_first':
        x = tf.transpose(x, [0, 2, 3, 1])

    shape = tf.shape(x)
    h, w = shape[1], shape[2]
    new_size = [upsample * h, upsample * w]
    x = tf.image.resize_bilinear(x, new_size)

    if DATA_FORMAT == 'channels_first':
        x = tf.transpose(x, [0, 3, 1, 2])

    return x
def class_net(x, is_training, depth, level, num_anchors_per_location):
    """
    Arguments:
        x: a float tensor with shape [batch_size, depth, height, width].
        is_training: a boolean.
        depth, level, num_anchors_per_location: integers.
    Returns:
        a float tensor with shape [batch_size, num_anchors_per_location, height, width].
    """

    for i in range(4):
        x = conv2d_same(x, depth, kernel_size=3, name='conv3x3_%d' % i)
        x = batch_norm_relu(x,
                            is_training,
                            name='batch_norm_%d_for_level_%d' % (i, level))

    import math
    p = 0.01  # probability of foreground
    # note that sigmoid(-log((1 - p) / p)) = p

    logits = tf.layers.conv2d(
        x,
        num_anchors_per_location,
        kernel_size=(3, 3),
        padding='same',
        bias_initializer=tf.constant_initializer(-math.log((1.0 - p) / p)),
        kernel_initializer=tf.random_normal_initializer(stddev=0.01),
        data_format=DATA_FORMAT,
        name='logits')
    return logits
def box_net(x, is_training, depth, level, num_anchors_per_location):
    """
    Arguments:
        x: a float tensor with shape [batch_size, depth, height, width].
        is_training: a boolean.
        depth, level, num_anchors_per_location: integers.
    Returns:
        a float tensor with shape [batch_size, 4 * num_anchors_per_location, height, width].
    """

    for i in range(4):
        x = conv2d_same(x, depth, kernel_size=3, name='conv3x3_%d' % i)
        x = batch_norm_relu(x,
                            is_training,
                            name='batch_norm_%d_for_level_%d' % (i, level))

    encoded_boxes = tf.layers.conv2d(
        x,
        4 * num_anchors_per_location,
        kernel_size=(3, 3),
        padding='same',
        bias_initializer=tf.zeros_initializer(),
        kernel_initializer=tf.random_normal_initializer(stddev=0.01),
        data_format=DATA_FORMAT,
        name='encoded_boxes')
    return encoded_boxes
Exemple #4
0
def feature_pyramid_network(features,
                            is_training,
                            depth,
                            min_level=3,
                            add_coarse_features=True,
                            scope='fpn'):
    """
    For person detector subnetwork we
    use min_level=3 and add_coarse_features=True
    (like in the original retinanet paper).

    For keypoint detector subnetwork we
    use min_level=2 and add_coarse_features=False
    (like in the original multiposenet paper).

    Arguments:
        features: a dict with four float tensors.
            It must have keys ['c2', 'c3', 'c4', 'c5'].
            Where a number in a name means that
            a feature has stride `2 ** number`.
        is_training: a boolean.
        depth: an integer.
        min_level: an integer, minimal feature stride
            that will be used is `2 ** min_level`.
            Possible values are [2, 3, 4, 5]
        add_coarse_features: a boolean, whether to add
            features with strides 64 and 128.
        scope: a string.
    Returns:
        a dict with float tensors.
    """

    with tf.variable_scope(scope):

        x = conv2d_same(features['c5'], depth, kernel_size=1, name='lateral5')
        p5 = conv2d_same(x, depth, kernel_size=3, name='p5')
        enriched_features = {'p5': p5}

        if add_coarse_features:
            p6 = conv2d_same(features['c5'],
                             depth,
                             kernel_size=3,
                             stride=2,
                             name='p6')
            pre_p7 = batch_norm_relu(p6, is_training, name='pre_p7_bn')
            p7 = conv2d_same(pre_p7, depth, kernel_size=3, stride=2, name='p7')
            enriched_features.update({'p6': p6, 'p7': p7})

        # top-down path
        for i in reversed(range(min_level, 5)):
            lateral = conv2d_same(features[f'c{i}'],
                                  depth,
                                  kernel_size=1,
                                  name=f'lateral{i}')
            x = nearest_neighbor_upsample(x) + lateral
            p = conv2d_same(x, depth, kernel_size=3, name=f'p{i}')
            enriched_features[f'p{i}'] = p

    return enriched_features
Exemple #5
0
    def __init__(self, backbone_features, is_training, params):
        """
        Arguments:
            backbone_features: a dict with float tensors.
                It contains keys ['c2', 'c3', 'c4', 'c5'].
            is_training: a boolean.
            params: a dict.
        """

        self.enriched_features = feature_pyramid_network(
            backbone_features,
            is_training,
            depth=DEPTH,
            min_level=2,
            add_coarse_features=False,
            scope='keypoint_fpn')
        normalized_enriched_features = {
            n: batch_norm_relu(x, is_training, name=f'{n}_batch_norm')
            for n, x in self.enriched_features.items()
        }
        # it is a dict with keys ['p2', 'p3', 'p4', 'p5']

        upsampled_features = []
        for level in range(2, 6):
            with tf.variable_scope(f'phi_subnet_{level}'):
                x = normalized_enriched_features[f'p{level}']
                y = phi_subnet(x, is_training, upsample=2**(level - 2))
                upsampled_features.append(y)

        upsampled_features = tf.concat(
            upsampled_features,
            axis=1 if DATA_FORMAT == 'channels_first' else 3)
        x = conv2d_same(upsampled_features,
                        64,
                        kernel_size=3,
                        name='final_conv3x3')
        x = batch_norm_relu(x, is_training, name='final_bn')

        p = 0.01  # probability of a keypoint
        # sigmoid(-log((1 - p) / p)) = p

        import math
        value = -math.log((1.0 - p) / p)
        keypoints_bias = 17 * [value]
        bias_initializer = tf.constant_initializer(keypoints_bias + [0.0])

        self.heatmaps = tf.layers.conv2d(
            x,
            NUM_KEYPOINTS + 1,
            kernel_size=1,
            padding='same',
            bias_initializer=bias_initializer,
            kernel_initializer=tf.random_normal_initializer(stddev=1e-4),
            data_format=DATA_FORMAT,
            name='heatmaps')

        if DATA_FORMAT == 'channels_first':

            self.heatmaps = tf.transpose(self.heatmaps, [0, 2, 3, 1])
            self.enriched_features = {
                n: tf.transpose(x, [0, 2, 3, 1])
                for n, x in self.enriched_features.items()
            }