def test_correct_gradients(self): tensor = tf.constant([ [0., 0, 1, 0., 0.], [0., 0, 1, 0., 0.], ])[None, :, :, None] out = gscnn_layers.gradient_mag(tensor) should_out = tf.constant([ [0., 1, 0, 1., 0.], [0., 1, 0, 1., 0.], ])[None, :, :, None] self.assertEqual(out.get_shape(), tf.TensorShape([1, 2, 5, 1])) self.assertAllClose(out, should_out) tensor = tf.constant([ [0., 0.], [0., 0.], [1., 1.], [0., 0.], [0., 0.], ])[None, :, :, None] out = gscnn_layers.gradient_mag(tensor) should_out = tf.constant([ [0., 0.], [1., 1.], [0., 0.], [1., 1.], [0., 0.], ])[None, :, :, None] self.assertEqual(out.get_shape(), tf.TensorShape([1, 5, 2, 1])) self.assertAllClose(out, should_out)
def _segmentation_edge_loss(gt_tensor, logit_tensor, thresh=0.8): """ :param gt_tensor [b, h, w, c] segmentation labels: :param pred_tensor [b, h, w, c] segmentation logits: :param thresh intensity to be considered edge: :return the difference in boundaries between predicted versus actual where the boundaries come from the segmentation, rather than the shape head: """ # soft approximation to argmax, so we can build an edge logit_tensor = _gumbel_softmax(logit_tensor) # normalised image gradients to give us edges # images will be [b, h, w, n_classes] gt_edges = gradient_mag(gt_tensor) pred_edges = gradient_mag(logit_tensor) # [b*h*w, n] gt_edges = tf.reshape(gt_edges, [-1, tf.shape(gt_edges)[-1]]) pred_edges = tf.reshape(pred_edges, [-1, tf.shape(gt_edges)[-1]]) # take the difference between these two gradient magnitudes # we will first take all the edges from the ground truth image # and then all the edges from the predicted edge_difference = tf.abs(gt_edges - pred_edges) # gt edges and disagreement with pred mask_gt = tf.cast((gt_edges > thresh ** 2), tf.float32) contrib_0 = tf.boolean_mask(edge_difference, mask_gt) contrib_0 = tf.cond( tf.greater(tf.size(contrib_0), 0), lambda: tf.reduce_mean(contrib_0), lambda: 0.) # vice versa mask_pred = tf.stop_gradient(tf.cast((pred_edges > thresh ** 2), tf.float32)) contrib_1 = tf.reduce_mean(tf.boolean_mask(edge_difference, mask_pred)) contrib_1 = tf.cond( tf.greater(tf.size(contrib_1), 0), lambda: tf.reduce_mean(contrib_1), lambda: 0.) return tf.reduce_mean(0.5 * contrib_0 + 0.5 * contrib_1)
def call(self, inputs, training=None, mask=None): # we need to repeat the input if batch size is 1 # because in training mode a batch size of 1 will create # nans, see: # https://github.com/tensorflow/tensorflow/issues/34062 one_item_batch = tf.shape(inputs)[0] == 1 if training is None: training = True inputs = tf.cond(tf.logical_and(one_item_batch, training), lambda: tf.tile(inputs, (2, 1, 1, 1)), lambda: inputs) # Backbone input_shape = tf.shape(inputs) target_shape = tf.stack([input_shape[1], input_shape[2]]) backbone_feature_dict = self.backbone(inputs, training=training) s1, s2, s3, s4 = (backbone_feature_dict['s1'], backbone_feature_dict['s2'], backbone_feature_dict['s3'], backbone_feature_dict['s4']) backbone_features = [s1, s2, s3, s4] # edge stream edge = gradient_mag(inputs, from_rgb=True) shape_activations, edge_out = self.shape_stream( [backbone_features, edge], training=training) # aspp backbone_activations = backbone_features[-1] intermediate_rep = backbone_features[1] net = self.atrous_pooling( [backbone_activations, shape_activations, intermediate_rep], training=training) # classify pixels net = self.logit_layer(net, training=training) net = tf.image.resize(net, target_shape) shape_activations = tf.image.resize(shape_activations, target_shape) out = tf.concat([net, shape_activations], axis=-1) out = tf.cond(one_item_batch, lambda: out[:1], lambda: out) return out