Beispiel #1
0
    def __init__(self,
                 num_classes,
                 train_dir='/tmp/model/train',
                 summary_log_freq=100):
        """A semantic segmentation model based on 3D UNet sparse voxel network.

    Args:
      num_classes: A int indicating the number of semantic classes to predict
        logits.
      train_dir: A directory path to write tensorboard summary for losses.
      summary_log_freq: A int of the frequency (as batches) to log summary.

    Returns:
      A dictionary containing a predicted tensor per task. The predicted tensors
        are of size [batch_size, num_voxels, num_task_channels].
    """
        super().__init__(loss_names_to_functions={
            'semantic_loss':
            classification_losses.classification_loss
        },
                         loss_names_to_weights={'semantic_loss': 1.0},
                         train_dir=train_dir,
                         summary_log_freq=summary_log_freq)

        task_names_to_num_output_channels = {
            standard_fields.DetectionResultFields.object_semantic_voxels:
            num_classes
        }

        self.num_classes = num_classes
        self.sparse_conv_unet = sparse_voxel_unet.SparseConvUNet(
            task_names_to_num_output_channels=task_names_to_num_output_channels
        )
Beispiel #2
0
  def test_sparse_voxel_unet(self):
    basenet = sparse_voxel_unet.SparseConvUNet(
        task_names_to_num_output_channels={'feature': 64})
    voxel_features = tf.constant(
        [[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [1.0, 2.0, 3.0]]],
        dtype=tf.float32)
    voxel_xyz_indices = tf.constant(
        [[[0, 0, 0], [0, 1, 0], [1, 0, 0], [0, 0, 1]]], dtype=tf.int32)
    num_valid_voxels = tf.constant([4], dtype=tf.int32)
    outputs = basenet([voxel_features, voxel_xyz_indices, num_valid_voxels],
                      training=True)

    self.assertAllEqual(outputs['feature'].shape, [1, 4, 64])
    def __init__(self,
                 num_stacked_networks=1,
                 task_names_to_num_output_channels=None,
                 task_names_to_use_relu_last_conv=None,
                 task_names_to_use_batch_norm_in_last_layer=None,
                 conv_filter_size=3,
                 encoder_dimensions=((32, 64), (64, 128), (128, 256)),
                 bottleneck_dimensions=(256, 256),
                 decoder_dimensions=((256, 256), (128, 128), (64, 64)),
                 dropout_prob=0.0,
                 use_batch_norm=True,
                 network_pooling_segment_func=tf.math.unsorted_segment_max,
                 normalize_sparse_conv=True):
        """3D UNet sparse voxel network.

    Args:
      num_stacked_networks: Number of stacked networks that build the hour-glass
        structure.
      task_names_to_num_output_channels: A dictionary containing the mapping
        between task names to number of prediction channels for each task.
      task_names_to_use_relu_last_conv: A dictionary containing the mapping
        between task names to whether relu should be applied at the last
        convolution or not. If None, by default relu will not be applied.
      task_names_to_use_batch_norm_in_last_layer: A dictionary containing the
        mapping between task names to whether batch norm is applied to the last
        convolution of the tasks.
      conv_filter_size: The 3d convolution filter size. Currently the 3d
        convolution op is optimized for a filter size of 3.
      encoder_dimensions: A tuple of tuples, where each nested tuple is a list
        of ints describing the output feature dimensionality of each 3x3x3
        convolution. After every nested tuple we do a 2x2x2 3D Max Pooling.
      bottleneck_dimensions: A tuple of ints describing the output feature
        dimensionality of each 3x3x3 convolution in the middle of the network,
        which is after we have finished downsampling but before upsampling.
      decoder_dimensions: A tuple of tuples, where each nested tuple is a list
        of ints describing the output feature dimensionality of each 3x3x3
        convolution. Before every new nested tuple we do a 2x2x2 upsampling
        operation, and then concatenate encoder features in a UNet fashion.
      dropout_prob: A float indicating the probability of dropout.
      use_batch_norm: Whether to use batch normalization or not.
      network_pooling_segment_func: Function used to pool voxel features in the
        network.
      normalize_sparse_conv: If True, applies normalization to 3d sparse convs.

    Returns:
      A dictionary containing a predicted tensor per task. The predicted tensors
        are of size [batch_size, num_voxels, num_task_channels].

    Raises:
      ValueError: If task_names_to_num_output_channels is None.
      ValueError: If the encoder and decoder have a different number of
        downsampling/upsampling levels.
    """
        super().__init__()

        if task_names_to_num_output_channels is None:
            raise ValueError(
                'task_names_to_num_output_channels cannot be None')

        if len(encoder_dimensions) != len(decoder_dimensions):
            raise ValueError(
                'The number of encoder and decoder blocks should be equal')

        if task_names_to_use_relu_last_conv is None:
            task_names_to_use_relu_last_conv = {}
            for key in sorted(task_names_to_num_output_channels):
                task_names_to_use_relu_last_conv[key] = False

        if task_names_to_use_batch_norm_in_last_layer is None:
            task_names_to_use_batch_norm_in_last_layer = {}
            for key in sorted(task_names_to_num_output_channels):
                task_names_to_use_batch_norm_in_last_layer[key] = False

        self.num_stacked_networks = num_stacked_networks
        self.input_spec = [
            tf.keras.layers.InputSpec(shape=(None, None, None),
                                      dtype=tf.float32),
            tf.keras.layers.InputSpec(shape=(None, None, 3), dtype=tf.int32),
            tf.keras.layers.InputSpec(shape=(None, ), dtype=tf.int32)
        ]

        self.networks = []
        decoder_dimensions_last = decoder_dimensions[-1][-1]
        for i in range(num_stacked_networks):
            if i == num_stacked_networks - 1:
                task_channels = task_names_to_num_output_channels
                task_relu = task_names_to_use_relu_last_conv
                task_batch_norm = task_names_to_use_batch_norm_in_last_layer
            else:
                task_channels = {
                    'intermediate_output': decoder_dimensions_last
                }
                task_relu = {'intermediate_output': True}
                task_batch_norm = {'intermediate_output': use_batch_norm}
            self.networks.append(
                sparse_voxel_unet.SparseConvUNet(
                    task_names_to_num_output_channels=task_channels,
                    task_names_to_use_relu_last_conv=task_relu,
                    task_names_to_use_batch_norm_in_last_layer=task_batch_norm,
                    conv_filter_size=conv_filter_size,
                    encoder_dimensions=encoder_dimensions,
                    bottleneck_dimensions=bottleneck_dimensions,
                    decoder_dimensions=decoder_dimensions,
                    dropout_prob=dropout_prob,
                    use_batch_norm=use_batch_norm,
                    network_pooling_segment_func=network_pooling_segment_func,
                    normalize_sparse_conv=normalize_sparse_conv))
Beispiel #4
0
    def __init__(self,
                 num_classes,
                 loss_names_to_functions=None,
                 loss_names_to_weights=None,
                 embedding_dims=64,
                 embedding_similarity_strategy='distance',
                 embedding_similarity_threshold=0.5,
                 apply_nms=True,
                 nms_score_threshold=0.1,
                 nms_iou_threshold=0.3,
                 num_furthest_voxel_samples=1000,
                 sampler_score_vs_distance_coef=0.5,
                 train_dir='/tmp/model/train',
                 summary_log_freq=100):
        """An object detection model based on 3D UNet sparse voxel network.

    Args:
      num_classes: A int indicating the number of semantic classes to predict
        logits.
      loss_names_to_functions: A dictionary mapping loss names to
        loss functions.
      loss_names_to_weights: A dictionary mapping loss names to loss weights.
      embedding_dims: An integer determining per voxels embeddings with the
        specified dimensionality are added to the outputs dictionary.
      embedding_similarity_strategy: Defines the method for computing similarity
        between embedding vectors. Possible values are 'dotproduct'
        and 'distance'.
      embedding_similarity_threshold: Similarity threshold used to decide if two
        point embedding vectors belong to the same instance.
      apply_nms: If True, it will apply non-maximum suppression to the final
        predictions.
      nms_score_threshold: Score threshold used in non-maximum suppression.
      nms_iou_threshold: Intersection over union threshold used in
        non-maximum suppression.
      num_furthest_voxel_samples: Number of voxels to be sampled using furthest
        voxel sampling in the postprocessor.
      sampler_score_vs_distance_coef: The coefficient that balances the weight
        between furthest voxel sampling and highest score sampling in the
        postprocessor.
      train_dir: A directory path to write tensorboard summary for losses.
      summary_log_freq: A int of the frequency (as batches) to log summary.

    Returns:
      A dictionary containing tensors that contain predicted object properties.
    """
        super().__init__(loss_names_to_functions=loss_names_to_functions,
                         loss_names_to_weights=loss_names_to_weights,
                         train_dir=train_dir,
                         summary_log_freq=summary_log_freq)

        self.num_classes = num_classes
        self.embedding_dims = embedding_dims
        self.embedding_similarity_strategy = embedding_similarity_strategy
        self.embedding_similarity_threshold = embedding_similarity_threshold
        self.apply_nms = apply_nms
        self.nms_score_threshold = nms_score_threshold
        self.nms_iou_threshold = nms_iou_threshold
        self.num_furthest_voxel_samples = num_furthest_voxel_samples
        self.sampler_score_vs_distance_coef = sampler_score_vs_distance_coef
        task_names_to_num_output_channels = {
            standard_fields.DetectionResultFields.object_semantic_voxels:
            num_classes,
            standard_fields.DetectionResultFields.instance_embedding_voxels:
            embedding_dims,
        }
        task_names_to_use_relu_last_conv = {
            standard_fields.DetectionResultFields.object_semantic_voxels:
            False,
            standard_fields.DetectionResultFields.instance_embedding_voxels:
            False,
        }
        task_names_to_use_batch_norm_in_last_layer = {}
        for key in task_names_to_num_output_channels:
            task_names_to_use_batch_norm_in_last_layer[key] = False
        self.sparse_conv_unet = sparse_voxel_unet.SparseConvUNet(
            task_names_to_num_output_channels=task_names_to_num_output_channels,
            task_names_to_use_relu_last_conv=task_names_to_use_relu_last_conv,
            task_names_to_use_batch_norm_in_last_layer=(
                task_names_to_use_batch_norm_in_last_layer))