def network_fn(inputs):
        """Fine grained classification with multiplex spatial transformation channels utilizing inception nets

                """
        end_points = {}
        arg_scope = inception_v2.inception_v2_arg_scope(weight_decay=FLAGS.weight_decay)
        with slim.arg_scope(arg_scope):
            with tf.variable_scope('stn'):
                with tf.variable_scope('localization'):
                    transformer_theta = localization_net_alpha(inputs, NUM_TRANSFORMER, NUM_THETA_PARAMS)
                    transformer_theta_split = tf.split(transformer_theta, NUM_TRANSFORMER, axis=1)
                    end_points['stn/localization/transformer_theta'] = transformer_theta

                transformer_outputs = []
                for theta in transformer_theta_split:
                    transformer_outputs.append(
                        transformer(inputs, theta, transformer_output_size, sampling_kernel='bilinear'))

                inception_outputs = []
                transformer_outputs_shape = [FLAGS.batch_size, transformer_output_size[0],
                                             transformer_output_size[1], 3]
                with tf.variable_scope('classification'):
                    for path_idx, inception_inputs in enumerate(transformer_outputs):
                        with tf.variable_scope('path_{}'.format(path_idx)):
                            inception_inputs.set_shape(transformer_outputs_shape)
                            net, _ = inception_v2.inception_v2_base(inception_inputs)
                            inception_outputs.append(net)
                    # concatenate the endpoints: num_batch*7*7*(num_transformer*1024)
                    multipath_outputs = tf.concat(inception_outputs, axis=-1)

                    # final fc layer logits
                    classification_logits = _inception_logits(multipath_outputs, NUM_CLASSES, dropout_keep_prob)
                    end_points['stn/classification/logits'] = classification_logits

        return classification_logits, end_points
예제 #2
0
def transformer_inference(image):
    arg_scope = inception_v2.inception_v2_arg_scope(weight_decay=0.0)
    with slim.arg_scope(arg_scope):
        with slim.arg_scope([layers_lib.batch_norm, layers_lib.dropout],
                            is_training=False):
            with tf.variable_scope('stn'):
                with tf.variable_scope('localization'):
                    transformer_theta = localization_net_alpha(
                        image, num_transformer, NUM_THETA_PARAMS)
                    transformer_theta_split = tf.split(transformer_theta,
                                                       num_transformer,
                                                       axis=1)

                transformer_outputs = []
                transformer_output_size = [
                    transformed_height, transformed_width
                ]
                for theta in transformer_theta_split:
                    transformer_outputs.append(
                        transformer(image,
                                    theta,
                                    transformer_output_size,
                                    sampling_kernel='bilinear'))

    return transformer_outputs
예제 #3
0
def stn_cnn_with_image_output(inputs, transformer_output_size, num_classes):
    """Fine grained classification with multiplex spatial transformation channels utilizing inception nets

    """
    arg_scope = inception_v2.inception_v2_arg_scope(weight_decay=weight_decay)
    with slim.arg_scope(arg_scope):
        with tf.variable_scope('stn'):
            with tf.variable_scope('localization'):
                transformer_theta = localization_net_beta(
                    inputs, NUM_TRANSFORMER, NUM_THETA_PARAMS)
                transformer_theta_split = tf.split(transformer_theta,
                                                   NUM_TRANSFORMER,
                                                   axis=1)

            transformer_outputs = []
            for theta in transformer_theta_split:
                transformer_outputs.append(
                    transformer(inputs,
                                theta,
                                transformer_output_size,
                                sampling_kernel='bilinear'))

    return transformer_outputs
예제 #4
0
def network_fn(inputs):
    # return transformer_factory.transform(inputs, BATCH_PER_GPU, NUM_STN, (224, 224), NUM_CLASSES, FLAGS.weight_decay, True)
    end_points = {}
    # with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=True):
    # with slim.arg_scope(inception_v3_arg_scope(weight_decay=FLAGS.weight_decay)):
    with slim.arg_scope([slim.batch_norm, slim.dropout],
                        is_training=is_training):
        with slim.arg_scope(inception_v3_arg_scope(weight_decay=weight_decay)):
            with tf.variable_scope("loc") as scope:
                with tf.variable_scope("net") as scope2:
                    # _, _end_points = inception_resnet_v2.inception_resnet_v2(inputs, num_classes=2, is_training=True, scope = scope2)
                    loc_net, _ = inception_v2.inception_v2_base(inputs,
                                                                scope=scope2)
                # loc_net = _end_points['Conv2d_7b_1x1']
                loc_net = slim.conv2d(loc_net, 128, [1, 1], scope='Loc_1x1')
                default_kernel_size = [14, 14]
                # kernel_size = _reduced_kernel_size_for_small_input(loc_net, default_kernel_size)
                loc_net = slim.conv2d(loc_net,
                                      128,
                                      loc_net.get_shape()[1:3],
                                      padding='VALID',
                                      activation_fn=tf.nn.tanh,
                                      scope='Loc_fc1')
                loc_net = slim.flatten(loc_net)
                iv = 4.
                initial = np.array([iv, 0, iv, 0] * NUM_STN, dtype=np.float32)
                b_fc_loc = tf.get_variable(
                    "Loc_fc_b",
                    shape=[4 * NUM_STN],
                    initializer=init_ops.constant_initializer(initial),
                    dtype=dtypes.float32)
                W_fc_loc = tf.get_variable(
                    "Loc_fc_W",
                    shape=[128, 4 * NUM_STN],
                    initializer=init_ops.constant_initializer(
                        np.zeros((128, 4 * NUM_STN))),
                    dtype=dtypes.float32)
                theta = tf.nn.tanh(tf.matmul(loc_net, W_fc_loc) + b_fc_loc)
            _finals = []
            for i in xrange(NUM_STN):
                scope_name = "stn%d" % i
                with tf.variable_scope(scope_name) as scope1:
                    _theta = tf.slice(theta, [0, 4 * i], [-1, 4 * (i + 1)])
                    # loc_net = slim.conv2d(loc_net, 6, [1,1], activation_fn=tf.nn.tanh, scope='Loc_fc', biases_initializer = init_ops.constant_initializer([4.0,0.0,0.0,0.0,4.0,0.0]*128,dtype=dtypes.float32))
                    # loc_net = slim.conv2d(loc_net, 6, [1,1], activation_fn=tf.nn.tanh, scope='Loc_fc', biases_initializer = init_ops.constant_initializer([4.0],dtype=dtypes.float32))
                    # loc_net = slim.flatten(loc_net)
                    stn_output_size = (STN_OUT_SIZE, STN_OUT_SIZE)
                    x = transformer(inputs, _theta, stn_output_size)
                    x.set_shape([
                        BATCH_PER_GPU, stn_output_size[0], stn_output_size[1],
                        3
                    ])
                    # x.set_shape(tf.shape(inputs))
                    # tf.reshape(x, tf.shape(inputs))
                    end_points['x'] = x
                    # with tf.variable_scope("net") as scope2:
                    #  return inception_resnet_v2.inception_resnet_v2(x, num_classes=NUM_CLASSES, is_training=True, scope = scope2)
                    with tf.variable_scope("net") as scope2:
                        net, _ = inception_v2.inception_v2_base(x,
                                                                scope=scope2)
                    kernel_size = _reduced_kernel_size_for_small_input(
                        net, [7, 7])
                    net = slim.avg_pool2d(net,
                                          kernel_size,
                                          padding='VALID',
                                          scope='AvgPool_1a')
                    net = slim.dropout(net, keep_prob=0.7, scope='Dropout_1b')
                    _finals.append(net)
            with tf.variable_scope('Logits'):
                net = tf.concat(axis=3, values=_finals)
                logits = slim.conv2d(net,
                                     NUM_CLASSES, [1, 1],
                                     activation_fn=None,
                                     normalizer_fn=None,
                                     scope='Conv2d_1c_1x1')
                logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
                predictions = slim.softmax(logits, scope='Predictions')
                end_points['Predictions'] = predictions

                logits_a = slim.conv2d(net,
                                       NUM_ATTRIBS, [1, 1],
                                       activation_fn=None,
                                       normalizer_fn=None,
                                       scope='Conv2d_1c_1x1_a')
                logits_a = tf.squeeze(logits_a, [1, 2],
                                      name='SpatialSqueeze_a')
                predictions_a = slim.sigmoid(logits_a, scope='Predictions_a')
                end_points['Predictions_a'] = predictions_a
                return logits, logits_a, end_points