def cam_inception(inputs, num_classes=number_of_classes, is_training=True, reuse=None, delta=0.6): with tf.variable_scope('InceptionV4', [inputs], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = inception_v4.inception_v4_base(inputs, scope=scope) inception_c_feature = net with tf.variable_scope('cam_classifier/A'): net = slim.conv2d( inception_c_feature, 1024, [3, 3], weights_initializer=tf.truncated_normal_initializer(stddev=0.1), padding='SAME', scope='conv1_3x3') net = slim.conv2d( net, 1024, [3, 3], weights_initializer=tf.truncated_normal_initializer(stddev=0.1), padding='SAME', scope='conv2_3x3') net = slim.conv2d( net, num_classes, [1, 1], activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.1), scope='conv3_1x1') end_points['features_A'] = net # GAP kernel_size = net.get_shape()[1:3] if kernel_size.is_fully_defined(): net = slim.avg_pool2d(net, kernel_size, padding='VALID', scope='AvgPool_1a') else: net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool') logits = slim.flatten(net, scope='Flatten') end_points['Logits'] = logits end_points['Predictions_A'] = tf.argmax(logits, 1, name='Predictions_A') return logits, end_points
def testBuildOnlyUpToFinalEndpoint(self): batch_size = 5 height, width = 299, 299 all_endpoints = [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a', 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e', 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c', 'Mixed_7d' ] for index, endpoint in enumerate(all_endpoints): with tf.Graph().as_default(): inputs = tf.random_uniform((batch_size, height, width, 3)) out_tensor, end_points = inception_v4.inception_v4_base( inputs, final_endpoint=endpoint) self.assertTrue( out_tensor.op.name.startswith('InceptionV4/' + endpoint)) self.assertItemsEqual(all_endpoints[:index + 1], end_points)
def testBuildBaseNetwork(self): batch_size = 5 height, width = 299, 299 inputs = tf.random_uniform((batch_size, height, width, 3)) net, end_points = inception_v4.inception_v4_base(inputs) self.assertTrue(net.op.name.startswith('InceptionV4/Mixed_7d')) self.assertListEqual(net.get_shape().as_list(), [batch_size, 8, 8, 1536]) expected_endpoints = [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a', 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e', 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c', 'Mixed_7d' ] self.assertItemsEqual(end_points.keys(), expected_endpoints) for name, op in end_points.iteritems(): self.assertTrue(op.name.startswith('InceptionV4/' + name))
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''], 'layer_depth': [-1, -1, -1, 512, 256, 128], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('InceptionV4', reuse=self._reuse_weights) as scope: _, image_features = inception_v4.inception_v4_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Mixed_7d', scope=scope) feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def inception_v4_mod(images, trainable=True, is_training=True, weight_decay=0.00004, stddev=0.1, dropout_keep_prob=0.8, use_batch_norm=True, batch_norm_params=None, add_summaries=True, scope="InceptionV4"): """Builds an Inception V3 subgraph for image embeddings. Args: images: A float32 Tensor of shape [batch, height, width, channels]. trainable: Whether the inception submodel should be trainable or not. is_training: Boolean indicating training mode or not. weight_decay: Coefficient for weight regularization. stddev: The standard deviation of the trunctated normal weight initializer. dropout_keep_prob: Dropout keep probability. use_batch_norm: Whether to use batch normalization. batch_norm_params: Parameters for batch normalization. See tf.contrib.layers.batch_norm for details. add_summaries: Whether to add activation summaries. scope: Optional Variable scope. Returns: end_points: A dictionary of activations from inception_v3 layers. """ # Only consider the inception model to be in training mode if it's trainable. is_inception_model_training = trainable and is_training if use_batch_norm: # Default parameters for batch normalization. if not batch_norm_params: batch_norm_params = { "is_training": is_inception_model_training, "trainable": trainable, # Decay for the moving averages. "decay": 0.9997, # Epsilon to prevent 0s in variance. "epsilon": 0.001, # Collection containing the moving mean and moving variance. "variables_collections": { "beta": None, "gamma": None, "moving_mean": ["moving_vars"], "moving_variance": ["moving_vars"], } } else: batch_norm_params = None if trainable: weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay) else: weights_regularizer = None with tf.variable_scope(scope, "InceptionV4", [images]) as scope: with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, trainable=trainable): with slim.arg_scope( [slim.conv2d], weights_initializer=tf.truncated_normal_initializer(stddev=stddev), activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): net, end_points = inception_v4_base(images, scope=scope) with tf.variable_scope("logits"): shape = net.get_shape() net = slim.avg_pool2d(net, shape[1:3], padding="VALID", scope="pool") net = slim.dropout( net, keep_prob=dropout_keep_prob, is_training=is_inception_model_training, scope="dropout") net = slim.flatten(net, scope="flatten") # Add summaries. if add_summaries: for v in end_points.values(): tf.contrib.layers.summaries.summarize_activation(v) return net
def fcn_inception(inputs, num_classes, is_training=None, scope=None): net, end_points = inception_v4.inception_v4_base(inputs) layer1_out = end_points['Conv2d_1a_3x3'] layer2_out = end_points['Conv2d_2b_3x3'] layer3_out = end_points['Mixed_3a'] layer4_out = end_points['Mixed_4a'] layer5_out = end_points['Mixed_5e'] layer6_out = end_points['Mixed_6h'] layer7_out = end_points['Mixed_7d'] with tf.variable_scope(scope, 'fcn_inception'): deconv7 = slim.conv2d_transpose(layer7_out, 1024, 4, 2, 'VALID', scope='deconv7') end_points["fcn_inception/deconv7"] = deconv7 add6 = tf.add(deconv7, layer6_out, name='add6') deconv6 = slim.conv2d_transpose(add6, 384, 3, 2, 'VALID', scope='deconv6') end_points["deconv6"] = deconv6 add5 = tf.add(deconv6, layer5_out, name='add5') deconv5 = slim.conv2d_transpose(add5, 192, 4, 2, 'VALID', scope='deconv5') end_points["deconv5"] = deconv5 add4 = tf.add(deconv5, layer4_out, name='add4') deconv4 = slim.conv2d_transpose(add4, 160, 3, 1, 'VALID', scope='deconv4') end_points["deconv4"] = deconv4 add3 = tf.add(deconv4, layer3_out, name='add3') deconv3 = slim.conv2d_transpose(add3, 64, 3, 2, 'VALID', scope='deconv3') end_points["deconv3"] = deconv3 add2 = tf.add(deconv3, layer2_out, name='add2') deconv2 = slim.conv2d_transpose(add2, 32, 3, 1, 'VALID', scope='deconv2') end_points["deconv2"] = deconv2 add1 = tf.add(deconv2, layer1_out, name='add1') deconv1 = slim.conv2d_transpose(add1, num_classes, 4, 2, 'VALID', scope='deconv1') end_points["deconv1"] = deconv1 logits = tf.reshape(deconv1, [-1, num_classes], name='logits') return logits, end_points
def cam_inception(inputs, num_classes=number_of_classes, is_training=True, reuse=None, delta=0.8): with tf.variable_scope('InceptionV4', [inputs], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = inception_v4.inception_v4_base(inputs, scope=scope) inception_c_feature = net with tf.variable_scope('cam_classifier/A'): net = slim.conv2d( inception_c_feature, 1024, [3, 3], weights_initializer=tf.truncated_normal_initializer(stddev=0.01), padding='SAME', scope='conv1_3x3') net = slim.conv2d( net, 1024, [3, 3], weights_initializer=tf.truncated_normal_initializer(stddev=0.01), padding='SAME', scope='conv2_3x3') net = slim.conv2d( net, num_classes, [1, 1], activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.01), scope='conv3_1x1') end_points['features_A'] = net # GAP kernel_size = net.get_shape()[1:3] if kernel_size.is_fully_defined(): net = slim.avg_pool2d(net, kernel_size, padding='VALID', scope='AvgPool_1a') else: net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool') logits = slim.flatten(net, scope='Flatten') end_points['Logits'] = logits end_points['Predictions_A'] = tf.argmax(logits, 1, name='Predictions_A') with tf.variable_scope('cam_classifier/B'): batch_size = inception_c_feature.get_shape()[0] channels = inception_c_feature.get_shape()[3] for n in range(batch_size): ca_map = end_points['features_A'][n, :, :, end_points['Predictions_A'][n]] ca_map = (ca_map - tf.reduce_min(ca_map)) / ( tf.reduce_max(ca_map) - tf.reduce_min(ca_map)) ca_map = tf.expand_dims(ca_map, 2) for i in range(channels): if i == 0: erase_tmp = ca_map else: erase_tmp = tf.concat([erase_tmp, ca_map], 2) erase_tmp = tf.expand_dims(erase_tmp, 0) if n == 0: erase = erase_tmp else: erase = tf.concat([erase, erase_tmp], 0) erased_feature = tf.where(tf.less(erase, delta), inception_c_feature, tf.zeros_like(inception_c_feature)) aux_logits = slim.conv2d( erased_feature, 1024, [3, 3], weights_initializer=tf.truncated_normal_initializer(stddev=0.01), padding='SAME', scope='conv1_3x3') aux_logits = slim.conv2d( aux_logits, 1024, [3, 3], weights_initializer=tf.truncated_normal_initializer(stddev=0.01), padding='SAME', scope='conv2_3x3') aux_logits = slim.conv2d( aux_logits, num_classes, [1, 1], activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.01), scope='conv3_1x1') end_points['features_B'] = aux_logits # GAP if kernel_size.is_fully_defined(): aux_logits = slim.avg_pool2d(aux_logits, kernel_size, padding='VALID', scope='AvgPool_1a') else: aux_logits = tf.reduce_mean(aux_logits, [1, 2], keep_dims=True, name='global_pool') aux_logits = slim.flatten(aux_logits, scope='Flatten') end_points['AuxLogits'] = aux_logits end_points['Predictions_B'] = tf.argmax(aux_logits, 1, name='Predictions_B') return logits, end_points
def inception_v3(images, trainable=True, is_training=True, weight_decay=0.00004, stddev=0.1, dropout_keep_prob=0.8, use_batch_norm=True, batch_norm_params=None, add_summaries=True, scope="InceptionV4"): """Builds an Inception V3 subgraph for image embeddings. Args: images: A float32 Tensor of shape [batch, height, width, channels]. trainable: Whether the inception submodel should be trainable or not. is_training: Boolean indicating training mode or not. weight_decay: Coefficient for weight regularization. stddev: The standard deviation of the trunctated normal weight initializer. dropout_keep_prob: Dropout keep probability. use_batch_norm: Whether to use batch normalization. batch_norm_params: Parameters for batch normalization. See tf.contrib.layers.batch_norm for details. add_summaries: Whether to add activation summaries. scope: Optional Variable scope. Returns: end_points: A dictionary of activations from inception_v3 layers. """ # Only consider the inception model to be in training mode if it's # trainable. is_inception_model_training = trainable and is_training if use_batch_norm: # Default parameters for batch normalization. if not batch_norm_params: batch_norm_params = { "is_training": is_inception_model_training, "trainable": trainable, # Decay for the moving averages. "decay": 0.9997, # Epsilon to prevent 0s in variance. "epsilon": 0.001, # Collection containing the moving mean and moving variance. "variables_collections": { "beta": None, "gamma": None, "moving_mean": ["moving_vars"], "moving_variance": ["moving_vars"], } } else: batch_norm_params = None if trainable: weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay) else: weights_regularizer = None with tf.variable_scope(scope, "InceptionV4", [images]) as scope: with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, trainable=trainable): with slim.arg_scope( [slim.conv2d], weights_initializer=tf.truncated_normal_initializer(stddev=stddev), activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): net, end_points = inception_v4.inception_v4_base( images, scope=scope) with tf.variable_scope("logits"): shape = net.get_shape() net = slim.avg_pool2d( net, shape[1:3], padding="VALID", scope="pool") net = slim.dropout( net, keep_prob=dropout_keep_prob, is_training=is_inception_model_training, scope="dropout") net = slim.flatten(net, scope="flatten") # Add summaries. if add_summaries: for v in end_points.values(): tf.contrib.layers.summaries.summarize_activation(v) return net
def build_model_graph(self, inputs, mode): if mode != self.MODE.TEST: image_batch, target_batch, large_target_batch = inputs else: image_batch, tid_batch = inputs with tf.name_scope('model') as scope: pad = self.config['receptive_field_size'] - self.config[ 'contextual_pad'] image_batch = tf.pad(image_batch, [[0, 0], [pad, pad], [pad, pad], [0, 0]]) def define_loss(preds, target): loss = None if self.config['loss_function'] == 'l17': loss = tf.reduce_mean(tf.abs(target - preds)**1.7) elif self.config['loss_function'] == 'l2': loss = tf.reduce_mean((target - preds)**2) else: raise ValueError('Loss function %s is not supported.' % self.config['loss_function']) tf.add_to_collection(tf.GraphKeys.LOSSES, loss) return loss batch_norm_params = { 'decay': self.config['batch_norm_decay'], 'epsilon': self.config['batch_norm_epsilon'], 'is_training': (mode == self.MODE.TRAIN), } with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=(mode == self.MODE.TRAIN)): # Instantiate Inception-v4 up to and including Mixed_5a. net, end_points = inception_v4.inception_v4_base( image_batch, final_endpoint='Mixed_5a') # 3 x Inception-A blocks, corresponding to Mixed_5b, Mixed_5c, Mixed_5d. blocks = {} for idx in range(3): block_scope = 'Mixed_5' + chr(ord('b') + idx) net = inception_v4.block_inception_a(net, block_scope) net = slim.conv2d(net, 96, [1, 1], scope='Conv2d1_1x1', padding='SAME') net = slim.conv2d(net, 96, [3, 3], scope='Conv2d2_3x3', padding='SAME') preds = slim.conv2d(net, self.config['cls_nb'], [3, 3], scope='Conv2d3_3x3_preds', padding='SAME', activation_fn=None, normalizer_fn=None) if mode == self.MODE.TRAIN: define_loss(preds, target_batch) # Debugging self.debug_sequence = [] if mode != self.MODE.TRAIN: self.debug_sequence.append(preds) def recurrence(features, hidden, preds): preds = tf.stop_gradient(preds) with tf.name_scope(scope, 'Recurrence', [features, hidden, preds]): embed_pad = self.config['target_embedding_pad'] padded_preds = tf.pad(preds, [[0, 0], [embed_pad, embed_pad], [embed_pad, embed_pad], [0, 0]]) if mode == self.MODE.TRAIN: padded_preds = large_target_batch + padded_preds net = padded_preds with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): with slim.arg_scope( [slim.batch_norm, slim.dropout], is_training=(mode == self.MODE.TRAIN)): # Average pooling to blur the squres to make ground truth look # more like the predictions. # net = slim.avg_pool2d(net, 16, [3, 3], 1, scope='AvgPool', padding='VALID') net = slim.conv2d(net, 16, [3, 3], rate=1, scope='Dilated0', padding='VALID') net = slim.conv2d(net, 16, [3, 3], rate=2, scope='Dilated1', padding='VALID') net = slim.conv2d(net, 16, [3, 3], rate=3, scope='Dilated2', padding='VALID') net = slim.conv2d(net, 16, [3, 3], rate=4, scope='Dilated3', padding='VALID') net = slim.conv2d(net, 32, [3, 3], rate=5, scope='Dilated4', padding='VALID') net = slim.conv2d(net, 32, [3, 3], rate=1, scope='Dilated5', padding='VALID') # 32 + 384 + 384 + 6 net = tf.concat([net, hidden, features, preds], axis=3) # 3 x Inception-A blocks, corresponding to Mixed_5b, Mixed_5c, Mixed_5d. for idx in range(3): block_scope = 'Mixed_5' + chr(ord('b') + idx) net = inception_v4.block_inception_a( net, block_scope) hidden = slim.conv2d( net, self.config['hidden_state_size'], [1, 1], scope='Conv2d0_1x1', padding='SAME') net = slim.conv2d(net, 96, [1, 1], scope='Conv2d1_1x1', padding='SAME') net = slim.conv2d(net, 96, [3, 3], scope='Conv2d2_3x3', padding='SAME') preds = slim.conv2d(net, self.config['cls_nb'], [3, 3], scope='Conv2d3_3x3_preds', padding='SAME', activation_fn=None, normalizer_fn=None) if mode != self.MODE.TRAIN: self.debug_sequence.append(preds) loss = None if mode == self.MODE.TRAIN: loss = define_loss(preds, target_batch) return preds, loss, hidden # 3 x Recurrent context blocks rnn_template = tf.make_template('rnn_shared_variables', recurrence) hidden_shape = [ dim.value for dim in end_points['Mixed_5a'].get_shape() ] hidden_shape[-1] = self.config['hidden_state_size'] hidden = tf.zeros(hidden_shape) final_loss = None for idx in range(3): preds, final_loss, hidden = rnn_template( end_points['Mixed_5a'], hidden, preds) if mode == self.MODE.TRAIN: with tf.name_scope('stats'): tf.summary.scalar('final_loss', final_loss, collections=['train_summaries']) pf_area = self.config['projective_field_size']**2 target_counts = tf.reduce_sum(target_batch / pf_area, axis=[1, 2]) pred_counts = tf.reduce_sum(preds / pf_area, axis=[1, 2]) mae = tf.reduce_sum(tf.abs(pred_counts - target_counts)) tf.summary.scalar('mae', mae, collections=['train_summaries']) def build_target_cache(input_shape, size, offset, equalize=True): max_size = 0 output_size = 0 sizes = np.empty((size, size, 4), dtype=np.int32) for y in range(size): input_r = utilities.Rect(y + offset, 0, y + offset, 0, input_shape[1], input_shape[2]) r = utilities.calc_projective_field( image_batch.name, end_points['Mixed_5a'].name, input_r) sizes[y, :, 0] = r.min_y sizes[:, y, 1] = r.min_y sizes[y, :, 2] = r.max_y + 1 # For Python ranges and indices sizes[:, y, 3] = r.max_y + 1 # which exclude the upper bound. output_size = r.h if r.height > max_size: max_size = r.height if self.config['debug']: print('Projective field size: %i' % max_size) print('Output size: %i' % output_size) if equalize: # Make all projective fields the same size. for y in range(size): for x in range(size): if sizes[y, x, 2] - sizes[y, x, 0] < max_size: if sizes[y, x, 0] + max_size // 2 < output_size // 2: sizes[y, x, 2] = sizes[y, x, 0] + max_size else: sizes[y, x, 0] = sizes[y, x, 2] - max_size if sizes[y, x, 3] - sizes[y, x, 1] < max_size: if sizes[y, x, 1] + max_size // 2 < output_size // 2: sizes[y, x, 3] = sizes[y, x, 1] + max_size else: sizes[y, x, 1] = sizes[y, x, 3] - max_size if sizes[y, x, 2] - sizes[y, x, 0] != max_size \ or sizes[y, x, 3] - sizes[y, x, 1] != max_size: print(sizes[y, x]) return sizes, max_size self.target_sizes, self.config[ 'projective_field_size'] = build_target_cache( [dim.value for dim in image_batch.get_shape()], size=self.config['tile_size'], offset=self.config['receptive_field_size'], ) large_input_size = self.config[ 'tile_size'] + 2 * self.config['large_contextual_pad'] self.target_sizes_large, _ = build_target_cache( [1, large_input_size, large_input_size, 1], size=self.config['tile_size'] + 2 * self.config['large_contextual_pad_unpadded'], offset=self.config['receptive_field_size'], equalize= False, # The area content of these squares does not matter. ) if mode == self.MODE.VALIDATE: with tf.name_scope('stats'): # Mean absolute error pf_area = self.config['projective_field_size']**2 target_counts = tf.reduce_sum(target_batch / pf_area, axis=[1, 2]) pred_counts = tf.reduce_sum(preds / pf_area, axis=[1, 2]) mae = tf.reduce_sum(tf.abs(pred_counts - target_counts)) mae_avg = utilities.RunningAverage( 'mae', mae, summary_args={'collections': ['stats_summaries']}) # Accuracy acc = tf.reduce_mean( tf.cast( tf.abs(tf.reduce_mean(preds - target_batch, [1, 2])), tf.float32)) acc_avg = utilities.RunningAverage( 'accuracy', acc, summary_args={'collections': ['stats_summaries']}) self.valid_op = tf.group(mae_avg.update_op, acc_avg.update_op) self.stats_reset_op = tf.group(mae_avg.reset_op, acc_avg.reset_op) self.score = mae_avg.value print(self.score) # Debugging self.debug_preds = preds self.debug_targets = target_batch self.debug_large_targets = large_target_batch embed_pad = self.config['target_embedding_pad'] reduced_large_target_batch = tf.reduce_sum(large_target_batch, axis=3, keep_dims=True) reduced_target_batch = tf.reduce_sum(tf.pad( target_batch, [[0, 0], [embed_pad, embed_pad], [embed_pad, embed_pad], [0, 0]]), axis=3, keep_dims=True) self.debug_combined_preds = tf.concat([ reduced_large_target_batch, reduced_target_batch, tf.zeros_like(reduced_target_batch) ], axis=3) if not mode == self.MODE.TRAIN: # Debugging self.debug_inputs = image_batch if mode == self.MODE.TEST: pf_area = self.config['projective_field_size']**2 cond = tf.greater(preds, tf.fill(tf.shape(preds), 0.05)) preds = tf.where(cond, preds, tf.zeros(tf.shape(preds))) pred_counts = tf.reduce_sum(preds / pf_area, axis=[1, 2]) self.test_op = (pred_counts, tid_batch)
def build_model_graph(self, inputs, mode): image_batch, target_batch, tid_batch = inputs # The tiles are extended by a margin of 32 px. This roughly corresponds to # the extend by which the receptive field in Mixed_5d are . This way, when # the window slides toward the boundaries of the image, the extended # receptive field can recognize the boundaries early enough such that it # can correctly make the distinction whether the center of an animal is # inside or outside of the tile. with tf.name_scope('model') as scope: pad = self.config['receptive_field_size'] - self.config[ 'contextual_pad'] image_batch = tf.pad(image_batch, [[0, 0], [pad, pad], [pad, pad], [0, 0]]) batch_norm_params = { 'decay': self.config['batch_norm_decay'], 'epsilon': self.config['batch_norm_epsilon'], 'is_training': (mode == self.MODE.TRAIN), } with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=(mode == self.MODE.TRAIN)): net, end_points = inception_v4.inception_v4_base( image_batch, final_endpoint='Mixed_5d') preds = slim.conv2d(net, self.config['cls_nb'], [1, 1], scope='Conv2d_1x1_preds', activation_fn=None) if mode == self.MODE.TRAIN: # Build target cache self.target_sizes = np.empty( (self.config['tile_size'], self.config['tile_size'], 4), dtype=np.int32) input_shape = [dim.value for dim in image_batch.get_shape()] max_size = 0 output_size = 0 for y in range(self.config['tile_size']): input_r = utilities.Rect( y + self.config['receptive_field_size'], 0, y + self.config['receptive_field_size'], 0, input_shape[1], input_shape[2]) r = utilities.calc_projective_field( image_batch.name, end_points['Mixed_5a'].name, input_r) output_size = r.h if r.height > max_size: max_size = r.height self.target_sizes[y, :, 0] = r.min_y self.target_sizes[:, y, 1] = r.min_y self.target_sizes[ y, :, 2] = r.max_y + 1 # For Python ranges and indices self.target_sizes[:, y, 3] = r.max_y + 1 # which exclude the upper bound. self.config['projective_field_size'] = max_size if self.config['debug']: print('Projective field size: %i' % max_size) print('Output size: %i' % output_size) # Make all projective fields the same size. for y in range(self.config['tile_size']): for x in range(self.config['tile_size']): if self.target_sizes[y, x, 2] - self.target_sizes[ y, x, 0] < max_size: if self.target_sizes[ y, x, 0] + max_size // 2 < output_size // 2: self.target_sizes[ y, x, 2] = self.target_sizes[y, x, 0] + max_size else: self.target_sizes[ y, x, 0] = self.target_sizes[y, x, 2] - max_size if self.target_sizes[y, x, 3] - self.target_sizes[ y, x, 1] < max_size: if self.target_sizes[ y, x, 1] + max_size // 2 < output_size // 2: self.target_sizes[ y, x, 3] = self.target_sizes[y, x, 1] + max_size else: self.target_sizes[ y, x, 1] = self.target_sizes[y, x, 3] - max_size if self.target_sizes[y, x, 2] - self.target_sizes[y, x, 0] != max_size \ or self.target_sizes[y, x, 3] - self.target_sizes[y, x, 1] != max_size: print(self.target_sizes[y, x]) with tf.name_scope('training'): loss = tf.reduce_mean((target_batch - preds)**2) tf.add_to_collection(tf.GraphKeys.LOSSES, loss) pf_area = self.config['projective_field_size']**2 if mode == self.MODE.VALIDATE: with tf.name_scope('stats'): # Mean absolute error pred_counts = tf.reduce_sum(preds / pf_area, axis=[1, 2]) target_counts = tf.reduce_sum(target_batch / pf_area, axis=[1, 2]) mae = tf.reduce_sum(tf.abs(pred_counts - target_counts)) mae_avg = utilities.RunningAverage( 'mae', mae, summary_args={'collections': ['stats_summaries']}) # Accuracy acc = tf.reduce_mean( tf.cast( tf.abs(tf.reduce_mean(preds - target_batch, [1, 2])), tf.float32)) acc_avg = utilities.RunningAverage( 'accuracy', acc, summary_args={'collections': ['stats_summaries']}) with tf.control_dependencies( [mae_avg.update_op, acc_avg.update_op]): self.valid_op = tf.no_op( ) # All we need is the control dependencies above. self.stats_reset_op = tf.group(mae_avg.reset_op, acc_avg.reset_op) self.score = mae_avg.value self.debug_preds = preds self.debug_inputs = image_batch self.debug_targets = target_batch if mode == self.MODE.TEST: pf_area = (self.config['projective_field_size'] / self.config['stride'])**2 pred_counts = tf.reduce_sum(preds / pf_area, axis=[1, 2]) self.test_op = (pred_counts, tid_batch)