def network_fn(inputs): """Fine grained classification with multiplex spatial transformation channels utilizing inception nets """ end_points = {} arg_scope = inception_v2.inception_v2_arg_scope(weight_decay=FLAGS.weight_decay) with slim.arg_scope(arg_scope): with tf.variable_scope('stn'): with tf.variable_scope('localization'): transformer_theta = localization_net_alpha(inputs, NUM_TRANSFORMER, NUM_THETA_PARAMS) transformer_theta_split = tf.split(transformer_theta, NUM_TRANSFORMER, axis=1) end_points['stn/localization/transformer_theta'] = transformer_theta transformer_outputs = [] for theta in transformer_theta_split: transformer_outputs.append( transformer(inputs, theta, transformer_output_size, sampling_kernel='bilinear')) inception_outputs = [] transformer_outputs_shape = [FLAGS.batch_size, transformer_output_size[0], transformer_output_size[1], 3] with tf.variable_scope('classification'): for path_idx, inception_inputs in enumerate(transformer_outputs): with tf.variable_scope('path_{}'.format(path_idx)): inception_inputs.set_shape(transformer_outputs_shape) net, _ = inception_v2.inception_v2_base(inception_inputs) inception_outputs.append(net) # concatenate the endpoints: num_batch*7*7*(num_transformer*1024) multipath_outputs = tf.concat(inception_outputs, axis=-1) # final fc layer logits classification_logits = _inception_logits(multipath_outputs, NUM_CLASSES, dropout_keep_prob) end_points['stn/classification/logits'] = classification_logits return classification_logits, end_points
def transformer_inference(image): arg_scope = inception_v2.inception_v2_arg_scope(weight_decay=0.0) with slim.arg_scope(arg_scope): with slim.arg_scope([layers_lib.batch_norm, layers_lib.dropout], is_training=False): with tf.variable_scope('stn'): with tf.variable_scope('localization'): transformer_theta = localization_net_alpha( image, num_transformer, NUM_THETA_PARAMS) transformer_theta_split = tf.split(transformer_theta, num_transformer, axis=1) transformer_outputs = [] transformer_output_size = [ transformed_height, transformed_width ] for theta in transformer_theta_split: transformer_outputs.append( transformer(image, theta, transformer_output_size, sampling_kernel='bilinear')) return transformer_outputs
def stn_cnn_with_image_output(inputs, transformer_output_size, num_classes): """Fine grained classification with multiplex spatial transformation channels utilizing inception nets """ arg_scope = inception_v2.inception_v2_arg_scope(weight_decay=weight_decay) with slim.arg_scope(arg_scope): with tf.variable_scope('stn'): with tf.variable_scope('localization'): transformer_theta = localization_net_beta( inputs, NUM_TRANSFORMER, NUM_THETA_PARAMS) transformer_theta_split = tf.split(transformer_theta, NUM_TRANSFORMER, axis=1) transformer_outputs = [] for theta in transformer_theta_split: transformer_outputs.append( transformer(inputs, theta, transformer_output_size, sampling_kernel='bilinear')) return transformer_outputs
def network_fn(inputs): # return transformer_factory.transform(inputs, BATCH_PER_GPU, NUM_STN, (224, 224), NUM_CLASSES, FLAGS.weight_decay, True) end_points = {} # with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=True): # with slim.arg_scope(inception_v3_arg_scope(weight_decay=FLAGS.weight_decay)): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope(inception_v3_arg_scope(weight_decay=weight_decay)): with tf.variable_scope("loc") as scope: with tf.variable_scope("net") as scope2: # _, _end_points = inception_resnet_v2.inception_resnet_v2(inputs, num_classes=2, is_training=True, scope = scope2) loc_net, _ = inception_v2.inception_v2_base(inputs, scope=scope2) # loc_net = _end_points['Conv2d_7b_1x1'] loc_net = slim.conv2d(loc_net, 128, [1, 1], scope='Loc_1x1') default_kernel_size = [14, 14] # kernel_size = _reduced_kernel_size_for_small_input(loc_net, default_kernel_size) loc_net = slim.conv2d(loc_net, 128, loc_net.get_shape()[1:3], padding='VALID', activation_fn=tf.nn.tanh, scope='Loc_fc1') loc_net = slim.flatten(loc_net) iv = 4. initial = np.array([iv, 0, iv, 0] * NUM_STN, dtype=np.float32) b_fc_loc = tf.get_variable( "Loc_fc_b", shape=[4 * NUM_STN], initializer=init_ops.constant_initializer(initial), dtype=dtypes.float32) W_fc_loc = tf.get_variable( "Loc_fc_W", shape=[128, 4 * NUM_STN], initializer=init_ops.constant_initializer( np.zeros((128, 4 * NUM_STN))), dtype=dtypes.float32) theta = tf.nn.tanh(tf.matmul(loc_net, W_fc_loc) + b_fc_loc) _finals = [] for i in xrange(NUM_STN): scope_name = "stn%d" % i with tf.variable_scope(scope_name) as scope1: _theta = tf.slice(theta, [0, 4 * i], [-1, 4 * (i + 1)]) # loc_net = slim.conv2d(loc_net, 6, [1,1], activation_fn=tf.nn.tanh, scope='Loc_fc', biases_initializer = init_ops.constant_initializer([4.0,0.0,0.0,0.0,4.0,0.0]*128,dtype=dtypes.float32)) # loc_net = slim.conv2d(loc_net, 6, [1,1], activation_fn=tf.nn.tanh, scope='Loc_fc', biases_initializer = init_ops.constant_initializer([4.0],dtype=dtypes.float32)) # loc_net = slim.flatten(loc_net) stn_output_size = (STN_OUT_SIZE, STN_OUT_SIZE) x = transformer(inputs, _theta, stn_output_size) x.set_shape([ BATCH_PER_GPU, stn_output_size[0], stn_output_size[1], 3 ]) # x.set_shape(tf.shape(inputs)) # tf.reshape(x, tf.shape(inputs)) end_points['x'] = x # with tf.variable_scope("net") as scope2: # return inception_resnet_v2.inception_resnet_v2(x, num_classes=NUM_CLASSES, is_training=True, scope = scope2) with tf.variable_scope("net") as scope2: net, _ = inception_v2.inception_v2_base(x, scope=scope2) kernel_size = _reduced_kernel_size_for_small_input( net, [7, 7]) net = slim.avg_pool2d(net, kernel_size, padding='VALID', scope='AvgPool_1a') net = slim.dropout(net, keep_prob=0.7, scope='Dropout_1b') _finals.append(net) with tf.variable_scope('Logits'): net = tf.concat(axis=3, values=_finals) logits = slim.conv2d(net, NUM_CLASSES, [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_1c_1x1') logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') predictions = slim.softmax(logits, scope='Predictions') end_points['Predictions'] = predictions logits_a = slim.conv2d(net, NUM_ATTRIBS, [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_1c_1x1_a') logits_a = tf.squeeze(logits_a, [1, 2], name='SpatialSqueeze_a') predictions_a = slim.sigmoid(logits_a, scope='Predictions_a') end_points['Predictions_a'] = predictions_a return logits, logits_a, end_points