Exemple #1
0
def localization_net_alpha(inputs, num_transformer, num_theta_params):
    """
    Utilize inception_v2 as the localization net of spatial transformer
    """
    # outputs 7*7*1024: default final_endpoint='Mixed_5c' before full connection layer
    with tf.variable_scope('inception_net'):
        net, _ = inception_v2.inception_v2_base(inputs)

    # fc layer using [1, 1] convolution kernel: 1*1*1024
    with tf.variable_scope('logits'):
        net = slim.conv2d(net, 128, [1, 1], scope='conv2d_a_1x1')
        kernel_size = inception_v2._reduced_kernel_size_for_small_input(
            net, [7, 7])
        net = slim.conv2d(net,
                          128,
                          kernel_size,
                          padding='VALID',
                          scope='conv2d_b_{}x{}'.format(*kernel_size))
        init_biase = tf.constant_initializer([2.0, .0, 2.0, .0] *
                                             num_transformer)
        logits = slim.conv2d(
            net,
            num_transformer * num_theta_params, [1, 1],
            weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
            biases_initializer=init_biase,
            normalizer_fn=None,
            activation_fn=tf.nn.tanh,
            scope='conv2d_c_1x1')

        return tf.squeeze(logits, [1, 2])
    def _inception_logits(inputs, num_outputs, dropout_keep_prob, activ_fn=None):
        with tf.variable_scope('logits'):
            kernel_size = inception_v2._reduced_kernel_size_for_small_input(inputs, [7, 7])
            # shape ?*1*1*?
            net = slim.avg_pool2d(inputs, kernel_size, padding='VALID')
            # drop out neuron before fc conv
            net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='dropout')
            # [1, 1] fc conv
            logits = slim.conv2d(net, num_outputs, [1, 1], normalizer_fn=None, activation_fn=activ_fn,
                                 scope='conv2_a_1x1')

        return tf.squeeze(logits, [1, 2])
Exemple #3
0
def localization_net_beta(inputs, num_transformer, num_theta_parmas):
    with tf.variable_scope('inception_net'):
        net, _ = inception_v2.inception_v2_base(inputs)
    with tf.variable_scope('logits'):
        with tf.variable_scope('branch_0'):
            branch0 = slim.conv2d(net, 128, [1, 1], scope='conv2d_a_1x1')
            branch0 = slim.conv2d(branch0,
                                  144, [3, 3],
                                  stride=2,
                                  scope='conv2d_b_3x3')
        with tf.variable_scope('branch_1'):
            branch1 = slim.conv2d(net, 144, [1, 1], scope='conv2d_a_1x1')
            branch1 = slim.max_pool2d(branch1, [3, 3],
                                      stride=2,
                                      padding='SAME',
                                      scope='max_pool_b_3x3')
        net = tf.concat([branch0, branch1], axis=-1)

        kernel_size = inception_v2._reduced_kernel_size_for_small_input(
            net, [7, 7])
        net = slim.avg_pool2d(net,
                              kernel_size,
                              padding='VALID',
                              scope='avg_pool_a_{}x{}'.format(*kernel_size))
        init_biase = tf.constant_initializer([2.0, .0, 2.0, .0] *
                                             num_transformer)
        logits = slim.conv2d(
            net,
            num_transformer * num_theta_parmas, [1, 1],
            weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
            biases_initializer=init_biase,
            normalizer_fn=None,
            activation_fn=tf.nn.tanh,
            scope='conv2d_b_1x1')

    return tf.squeeze(logits, [1, 2])