def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]], Union[tf.Tensor, Mapping[str, tf.Tensor]]]): """Forward pass of the segmentation head. It supports both a tuple of 2 tensors or 2 dictionaries. The first is backbone endpoints, and the second is decoder endpoints. When inputs are tensors, they are from a single level of feature maps. When inputs are dictionaries, they contain multiple levels of feature maps, where the key is the index of feature map. Args: inputs: A tuple of 2 feature map tensors of shape [batch, height_l, width_l, channels] or 2 dictionaries of tensors: - key: A `str` of the level of the multilevel features. - values: A `tf.Tensor` of the feature map tensors, whose shape is [batch, height_l, width_l, channels]. The first is backbone endpoints, and the second is decoder endpoints. Returns: segmentation prediction mask: A `tf.Tensor` of the segmentation mask scores predicted from input features. """ backbone_output = inputs[0] decoder_output = inputs[1] if self._config_dict['feature_fusion'] == 'deeplabv3plus': # deeplabv3+ feature fusion x = decoder_output[str(self._config_dict['level'])] if isinstance( decoder_output, dict) else decoder_output y = backbone_output[str( self._config_dict['low_level'])] if isinstance( backbone_output, dict) else backbone_output y = self._dlv3p_norm(self._dlv3p_conv(y)) y = self._activation(y) x = tf.image.resize(x, tf.shape(y)[1:3], method=tf.image.ResizeMethod.BILINEAR) x = tf.cast(x, dtype=y.dtype) x = tf.concat([x, y], axis=self._bn_axis) elif self._config_dict['feature_fusion'] == 'pyramid_fusion': if not isinstance(decoder_output, dict): raise ValueError('Only support dictionary decoder_output.') x = nn_layers.pyramid_feature_fusion(decoder_output, self._config_dict['level']) elif self._config_dict['feature_fusion'] == 'panoptic_fpn_fusion': x = self._panoptic_fpn_fusion(decoder_output) else: x = decoder_output[str(self._config_dict['level'])] if isinstance( decoder_output, dict) else decoder_output for conv, norm in zip(self._convs, self._norms): x = conv(x) x = norm(x) x = self._activation(x) if self._config_dict['upsample_factor'] > 1: x = spatial_transform_ops.nearest_upsampling( x, scale=self._config_dict['upsample_factor']) return self._classifier(x)
def call(self, backbone_output: Mapping[str, tf.Tensor], decoder_output: Mapping[str, tf.Tensor]): """Forward pass of the segmentation head. Args: backbone_output: A `dict` of tensors - key: A `str` of the level of the multilevel features. - values: A `tf.Tensor` of the feature map tensors, whose shape is [batch, height_l, width_l, channels]. decoder_output: A `dict` of tensors - key: A `str` of the level of the multilevel features. - values: A `tf.Tensor` of the feature map tensors, whose shape is [batch, height_l, width_l, channels]. Returns: segmentation prediction mask: A `tf.Tensor` of the segmentation mask scores predicted from input features. """ if self._config_dict['feature_fusion'] == 'deeplabv3plus': # deeplabv3+ feature fusion x = decoder_output[str(self._config_dict['level'])] y = backbone_output[str(self._config_dict['low_level'])] y = self._dlv3p_norm(self._dlv3p_conv(y)) y = self._activation(y) x = tf.image.resize(x, tf.shape(y)[1:3], method=tf.image.ResizeMethod.BILINEAR) x = tf.cast(x, dtype=y.dtype) x = tf.concat([x, y], axis=self._bn_axis) elif self._config_dict['feature_fusion'] == 'pyramid_fusion': x = nn_layers.pyramid_feature_fusion(decoder_output, self._config_dict['level']) else: x = decoder_output[str(self._config_dict['level'])] for conv, norm in zip(self._convs, self._norms): x = conv(x) x = norm(x) x = self._activation(x) if self._config_dict['upsample_factor'] > 1: x = spatial_transform_ops.nearest_upsampling( x, scale=self._config_dict['upsample_factor']) return self._classifier(x)
def call(self, backbone_output, decoder_output): """Forward pass of the segmentation head. Args: backbone_output: A `dict` of tensors - key: A `str` of the level of the multilevel features. - values: A `tf.Tensor` of the feature map tensors, whose shape is [batch, height_l, width_l, channels]. decoder_output: A `dict` of tensors - key: A `str` of the level of the multilevel features. - values: A `tf.Tensor` of the feature map tensors, whose shape is [batch, height_l, width_l, channels]. Returns: segmentation prediction mask: A `tf.Tensor` of the segmentation mask scores predicted from input features. """ if self._config_dict['feature_fusion'] == 'deeplabv3plus': # deeplabv3+ feature fusion x = decoder_output[str(self._config_dict['level'])] y = backbone_output[str( self._config_dict['low_level'])] y = self._dlv3p_norm(self._dlv3p_conv(y)) y = self._activation(y) x = tf.image.resize( x, tf.shape(y)[1:3], method=tf.image.ResizeMethod.BILINEAR) x = tf.cast(x, dtype=y.dtype) x = tf.concat([x, y], axis=self._bn_axis) elif self._config_dict['feature_fusion'] == 'pyramid_fusion': x = nn_layers.pyramid_feature_fusion(decoder_output, self._config_dict['level']) elif self._config_dict['feature_fusion'] == 'deeplabv2': # deeplabv2 feature fusion for k in decoder_output.keys(): decoder_output[k] = self._activation(self._dlv2_norm(self._dlv2_conv(decoder_output[k]))) decoder_output[k] = self._classifier(decoder_output[k]) elif self._config_dict['feature_fusion'] == 'deeplabv1_msc': # deeplabv1 feature fusion for multi-scale prediction x = decoder_output[str(self._config_dict['level'])] msc_outputs=[] for i in range(self._config_dict['low_level']+1): x = backbone_output[str(i)] x = self._dlv1_msc_norms[i](self._dlv1_msc_convs33[i](x)) x = self._activation(x) x = self._dlv1_msc_norms[i](self._dlv1_msc_convs11[i](x)) x = self._activation(x) msc_outputs.append(x) x = decoder_output[str(self._config_dict['level'])] x = self._dlv1_msc_norms[i+1](self._dlv1_msc_convs33[i+1](x)) x = self._activation(x) x = self._dlv1_msc_norms[i+1](self._dlv1_msc_convs11[i+1](x)) x = self._activation(x) msc_outputs.append(x) x = tf.concat(msc_outputs, axis=self._bn_axis) else: x = decoder_output[str(self._config_dict['level'])] for conv, norm in zip(self._convs, self._norms): x = conv(x) x = norm(x) x = self._activation(x) if self._config_dict['feature_fusion'] == 'deeplabv2': out = sum(decoder_output.values()) else: x = spatial_transform_ops.nearest_upsampling( x, scale=self._config_dict['upsample_factor']) out = self._classifier(x) return out