コード例 #1
0
    def test_aspp_decoder_creation(self, level, dilation_rates, num_filters):
        """Test creation of ASPP decoder."""
        input_specs = {'1': tf.TensorShape([1, 128, 128, 3])}

        network = decoders.ASPP(level=level,
                                dilation_rates=dilation_rates,
                                num_filters=num_filters,
                                use_sync_bn=True)

        model_config = configs.semantic_segmentation.SemanticSegmentationModel(
        )
        model_config.num_classes = 10
        model_config.input_size = [None, None, 3]
        model_config.decoder = decoders_cfg.Decoder(
            type='aspp',
            aspp=decoders_cfg.ASPP(level=level,
                                   dilation_rates=dilation_rates,
                                   num_filters=num_filters))

        factory_network = factory.build_decoder(input_specs=input_specs,
                                                model_config=model_config)

        network_config = network.get_config()
        factory_network_config = factory_network.get_config()
        # Due to calling `super().get_config()` in aspp layer, everything but the
        # the name of two layer instances are the same, so we force equal name so it
        # will not give false alarm.
        factory_network_config['name'] = network_config['name']

        self.assertEqual(network_config, factory_network_config)
コード例 #2
0
 def test_deeplabv3_builder(self, backbone_type, input_size, weight_decay):
     num_classes = 21
     input_specs = tf.keras.layers.InputSpec(
         shape=[None, input_size[0], input_size[1], 3])
     model_config = semantic_segmentation_cfg.SemanticSegmentationModel(
         num_classes=num_classes,
         backbone=backbones.Backbone(type=backbone_type,
                                     mobilenet=backbones.MobileNet(
                                         model_id='MobileNetV2',
                                         output_stride=16)),
         decoder=decoders.Decoder(type='aspp',
                                  aspp=decoders.ASPP(level=4,
                                                     num_filters=256,
                                                     dilation_rates=[],
                                                     spp_layer_version='v1',
                                                     output_tensor=True)),
         head=semantic_segmentation_cfg.SegmentationHead(
             level=4,
             low_level=2,
             num_convs=1,
             upsample_factor=2,
             use_depthwise_convolution=True))
     l2_regularizer = (tf.keras.regularizers.l2(weight_decay)
                       if weight_decay else None)
     model = factory.build_segmentation_model(input_specs=input_specs,
                                              model_config=model_config,
                                              l2_regularizer=l2_regularizer)
     quantization_config = common.Quantization()
     _ = qat_factory.build_qat_segmentation_model(
         model=model,
         quantization=quantization_config,
         input_specs=input_specs)
コード例 #3
0
def mnv2_deeplabv3_cityscapes() -> cfg.ExperimentConfig:
    """Image segmentation on cityscapes with mobilenetv2 deeplabv3."""
    train_batch_size = 16
    eval_batch_size = 16
    steps_per_epoch = CITYSCAPES_TRAIN_EXAMPLES // train_batch_size
    output_stride = 16
    aspp_dilation_rates = []
    pool_kernel_size = [512, 1024]

    level = int(np.math.log2(output_stride))
    config = cfg.ExperimentConfig(
        task=SemanticSegmentationTask(
            model=SemanticSegmentationModel(
                # Cityscapes uses only 19 semantic classes for train/evaluation.
                # The void (background) class is ignored in train and evaluation.
                num_classes=19,
                input_size=[None, None, 3],
                backbone=backbones.Backbone(type='mobilenet',
                                            mobilenet=backbones.MobileNet(
                                                model_id='MobileNetV2',
                                                output_stride=output_stride)),
                decoder=decoders.Decoder(
                    type='aspp',
                    aspp=decoders.ASPP(level=level,
                                       dilation_rates=aspp_dilation_rates,
                                       pool_kernel_size=pool_kernel_size)),
                head=SegmentationHead(level=level, num_convs=0),
                norm_activation=common.NormActivation(activation='relu',
                                                      norm_momentum=0.99,
                                                      norm_epsilon=1e-3,
                                                      use_sync_bn=True)),
            losses=Losses(l2_weight_decay=4e-5),
            train_data=DataConfig(input_path=os.path.join(
                CITYSCAPES_INPUT_PATH_BASE, 'train_fine**'),
                                  crop_size=[512, 1024],
                                  output_size=[1024, 2048],
                                  is_training=True,
                                  global_batch_size=train_batch_size,
                                  aug_scale_min=0.5,
                                  aug_scale_max=2.0),
            validation_data=DataConfig(input_path=os.path.join(
                CITYSCAPES_INPUT_PATH_BASE, 'val_fine*'),
                                       output_size=[1024, 2048],
                                       is_training=False,
                                       global_batch_size=eval_batch_size,
                                       resize_eval_groundtruth=True,
                                       drop_remainder=False),
            # Coco pre-trained mobilenetv2 checkpoint
            init_checkpoint=
            'gs://tf_model_garden/cloud/vision-2.0/deeplab/deeplabv3_mobilenetv2_coco/best_ckpt-63',
            init_checkpoint_modules='backbone'),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=100000,
            validation_steps=CITYSCAPES_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            best_checkpoint_eval_metric='mean_iou',
            best_checkpoint_export_subdir='best_ckpt',
            best_checkpoint_metric_comp='higher',
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 0.01,
                        'decay_steps': 100000,
                        'end_learning_rate': 0.0,
                        'power': 0.9
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
コード例 #4
0
def seg_deeplabv3plus_cityscapes() -> cfg.ExperimentConfig:
    """Image segmentation on cityscapes with resnet deeplabv3+."""
    train_batch_size = 16
    eval_batch_size = 16
    steps_per_epoch = CITYSCAPES_TRAIN_EXAMPLES // train_batch_size
    output_stride = 16
    aspp_dilation_rates = [6, 12, 18]
    multigrid = [1, 2, 4]
    stem_type = 'v1'
    level = int(np.math.log2(output_stride))
    config = cfg.ExperimentConfig(
        task=SemanticSegmentationTask(
            model=SemanticSegmentationModel(
                # Cityscapes uses only 19 semantic classes for train/evaluation.
                # The void (background) class is ignored in train and evaluation.
                num_classes=19,
                input_size=[None, None, 3],
                backbone=backbones.Backbone(
                    type='dilated_resnet',
                    dilated_resnet=backbones.DilatedResNet(
                        model_id=101,
                        output_stride=output_stride,
                        stem_type=stem_type,
                        multigrid=multigrid)),
                decoder=decoders.Decoder(
                    type='aspp',
                    aspp=decoders.ASPP(level=level,
                                       dilation_rates=aspp_dilation_rates,
                                       pool_kernel_size=[512, 1024])),
                head=SegmentationHead(level=level,
                                      num_convs=2,
                                      feature_fusion='deeplabv3plus',
                                      low_level=2,
                                      low_level_num_filters=48),
                norm_activation=common.NormActivation(activation='swish',
                                                      norm_momentum=0.99,
                                                      norm_epsilon=1e-3,
                                                      use_sync_bn=True)),
            losses=Losses(l2_weight_decay=1e-4),
            train_data=DataConfig(input_path=os.path.join(
                CITYSCAPES_INPUT_PATH_BASE, 'train_fine**'),
                                  crop_size=[512, 1024],
                                  output_size=[1024, 2048],
                                  is_training=True,
                                  global_batch_size=train_batch_size,
                                  aug_scale_min=0.5,
                                  aug_scale_max=2.0),
            validation_data=DataConfig(input_path=os.path.join(
                CITYSCAPES_INPUT_PATH_BASE, 'val_fine*'),
                                       output_size=[1024, 2048],
                                       is_training=False,
                                       global_batch_size=eval_batch_size,
                                       resize_eval_groundtruth=True,
                                       drop_remainder=False),
            # resnet101
            init_checkpoint=
            'gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400',
            init_checkpoint_modules='backbone'),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=500 * steps_per_epoch,
            validation_steps=CITYSCAPES_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 0.01,
                        'decay_steps': 500 * steps_per_epoch,
                        'end_learning_rate': 0.0,
                        'power': 0.9
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
コード例 #5
0
def seg_deeplabv3_pascal() -> cfg.ExperimentConfig:
    """Image segmentation on pascal voc with resnet deeplabv3."""
    train_batch_size = 16
    eval_batch_size = 8
    steps_per_epoch = PASCAL_TRAIN_EXAMPLES // train_batch_size
    output_stride = 16
    aspp_dilation_rates = [12, 24, 36]  # [6, 12, 18] if output_stride = 16
    multigrid = [1, 2, 4]
    stem_type = 'v1'
    level = int(np.math.log2(output_stride))
    config = cfg.ExperimentConfig(
        task=SemanticSegmentationTask(
            model=SemanticSegmentationModel(
                num_classes=21,
                input_size=[None, None, 3],
                backbone=backbones.Backbone(
                    type='dilated_resnet',
                    dilated_resnet=backbones.DilatedResNet(
                        model_id=101,
                        output_stride=output_stride,
                        multigrid=multigrid,
                        stem_type=stem_type)),
                decoder=decoders.Decoder(
                    type='aspp',
                    aspp=decoders.ASPP(level=level,
                                       dilation_rates=aspp_dilation_rates)),
                head=SegmentationHead(level=level, num_convs=0),
                norm_activation=common.NormActivation(activation='swish',
                                                      norm_momentum=0.9997,
                                                      norm_epsilon=1e-3,
                                                      use_sync_bn=True)),
            losses=Losses(l2_weight_decay=1e-4),
            train_data=DataConfig(
                input_path=os.path.join(PASCAL_INPUT_PATH_BASE, 'train_aug*'),
                # TODO(arashwan): test changing size to 513 to match deeplab.
                output_size=[512, 512],
                is_training=True,
                global_batch_size=train_batch_size,
                aug_scale_min=0.5,
                aug_scale_max=2.0),
            validation_data=DataConfig(input_path=os.path.join(
                PASCAL_INPUT_PATH_BASE, 'val*'),
                                       output_size=[512, 512],
                                       is_training=False,
                                       global_batch_size=eval_batch_size,
                                       resize_eval_groundtruth=False,
                                       groundtruth_padded_size=[512, 512],
                                       drop_remainder=False),
            # resnet101
            init_checkpoint=
            'gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400',
            init_checkpoint_modules='backbone'),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=45 * steps_per_epoch,
            validation_steps=PASCAL_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 0.007,
                        'decay_steps': 45 * steps_per_epoch,
                        'end_learning_rate': 0.0,
                        'power': 0.9
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
コード例 #6
0
def seg_deeplabv3plus_ade20k_32(backbone: str,
                                init_backbone: bool = True
                                ) -> cfg.ExperimentConfig:
    """Semantic segmentation on ADE20K dataset with deeplabv3+."""
    epochs = 200
    train_batch_size = 128
    eval_batch_size = 32
    image_size = 512
    steps_per_epoch = ADE20K_TRAIN_EXAMPLES // train_batch_size
    aspp_dilation_rates = [5, 10, 15]
    pretrained_checkpoint_path = BACKBONE_PRETRAINED_CHECKPOINT[
        backbone] if init_backbone else None
    config = cfg.ExperimentConfig(
        task=CustomSemanticSegmentationTaskConfig(
            model=base_cfg.SemanticSegmentationModel(
                # ADE20K uses only 32 semantic classes for train/evaluation.
                # The void (background) class is ignored in train and evaluation.
                num_classes=32,
                input_size=[None, None, 3],
                backbone=Backbone(
                    type='mobilenet_edgetpu',
                    mobilenet_edgetpu=MobileNetEdgeTPU(
                        model_id=backbone,
                        pretrained_checkpoint_path=pretrained_checkpoint_path,
                        freeze_large_filters=500,
                    )),
                decoder=decoders.Decoder(
                    type='aspp',
                    aspp=decoders.ASPP(
                        level=BACKBONE_HEADPOINT[backbone],
                        use_depthwise_convolution=True,
                        dilation_rates=aspp_dilation_rates,
                        pool_kernel_size=[256, 256],
                        num_filters=128,
                        dropout_rate=0.3,
                    )),
                head=base_cfg.SegmentationHead(
                    level=BACKBONE_HEADPOINT[backbone],
                    num_convs=2,
                    num_filters=256,
                    use_depthwise_convolution=True,
                    feature_fusion='deeplabv3plus',
                    low_level=BACKBONE_LOWER_FEATURES[backbone],
                    low_level_num_filters=48),
                norm_activation=common.NormActivation(activation='relu',
                                                      norm_momentum=0.99,
                                                      norm_epsilon=2e-3,
                                                      use_sync_bn=False)),
            train_data=base_cfg.DataConfig(
                input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'train-*'),
                output_size=[image_size, image_size],
                is_training=True,
                global_batch_size=train_batch_size),
            validation_data=base_cfg.DataConfig(
                input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'val-*'),
                output_size=[image_size, image_size],
                is_training=False,
                global_batch_size=eval_batch_size,
                resize_eval_groundtruth=True,
                drop_remainder=False),
            evaluation=base_cfg.Evaluation(report_train_mean_iou=False),
        ),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=epochs * steps_per_epoch,
            validation_steps=ADE20K_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'adam',
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 0.0001,
                        'decay_steps': epochs * steps_per_epoch,
                        'end_learning_rate': 0.0,
                        'power': 0.9
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 4 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
コード例 #7
0
def panoptic_deeplab_coco() -> cfg.ExperimentConfig:
    """COCO panoptic segmentation with Panoptic Deeplab."""
    train_steps = 200000
    train_batch_size = 64
    eval_batch_size = 1
    steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size
    validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size

    num_panoptic_categories = 201
    num_thing_categories = 91
    ignore_label = 0

    is_thing = [False]
    for idx in range(1, num_panoptic_categories):
        is_thing.append(True if idx <= num_thing_categories else False)

    input_size = [640, 640, 3]
    output_stride = 16
    aspp_dilation_rates = [6, 12, 18]
    multigrid = [1, 2, 4]
    stem_type = 'v1'
    level = int(np.math.log2(output_stride))

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16',
                                  enable_xla=True),
        task=PanopticDeeplabTask(
            init_checkpoint=
            'gs://tf_model_garden/vision/panoptic/panoptic_deeplab/imagenet/resnet50_v1/ckpt-436800',  # pylint: disable=line-too-long
            init_checkpoint_modules=['backbone'],
            model=PanopticDeeplab(
                num_classes=num_panoptic_categories,
                input_size=input_size,
                backbone=backbones.Backbone(
                    type='dilated_resnet',
                    dilated_resnet=backbones.DilatedResNet(
                        model_id=50,
                        stem_type=stem_type,
                        output_stride=output_stride,
                        multigrid=multigrid,
                        se_ratio=0.25,
                        last_stage_repeats=1,
                        stochastic_depth_drop_rate=0.2)),
                decoder=decoders.Decoder(
                    type='aspp',
                    aspp=decoders.ASPP(level=level,
                                       num_filters=256,
                                       pool_kernel_size=input_size[:2],
                                       dilation_rates=aspp_dilation_rates,
                                       use_depthwise_convolution=True,
                                       dropout_rate=0.1)),
                semantic_head=SemanticHead(level=level,
                                           num_convs=1,
                                           num_filters=256,
                                           kernel_size=5,
                                           use_depthwise_convolution=True,
                                           upsample_factor=1,
                                           low_level=[3, 2],
                                           low_level_num_filters=[64, 32],
                                           fusion_num_output_filters=256,
                                           prediction_kernel_size=1),
                instance_head=InstanceHead(level=level,
                                           num_convs=1,
                                           num_filters=32,
                                           kernel_size=5,
                                           use_depthwise_convolution=True,
                                           upsample_factor=1,
                                           low_level=[3, 2],
                                           low_level_num_filters=[32, 16],
                                           fusion_num_output_filters=128,
                                           prediction_kernel_size=1),
                shared_decoder=False,
                generate_panoptic_masks=True,
                post_processor=PanopticDeeplabPostProcessor(
                    output_size=input_size[:2],
                    center_score_threshold=0.1,
                    thing_class_ids=list(range(1, num_thing_categories)),
                    label_divisor=256,
                    stuff_area_limit=4096,
                    ignore_label=ignore_label,
                    nms_kernel=41,
                    keep_k_centers=200,
                    rescale_predictions=True)),
            losses=Losses(label_smoothing=0.0,
                          ignore_label=ignore_label,
                          l2_weight_decay=0.0,
                          top_k_percent_pixels=0.2,
                          segmentation_loss_weight=1.0,
                          center_heatmap_loss_weight=200,
                          center_offset_loss_weight=0.01),
            train_data=DataConfig(
                input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'),
                is_training=True,
                global_batch_size=train_batch_size,
                parser=Parser(
                    aug_scale_min=0.5,
                    aug_scale_max=1.5,
                    aug_rand_hflip=True,
                    aug_type=common.Augmentation(
                        type='autoaug',
                        autoaug=common.AutoAugment(
                            augmentation_name='panoptic_deeplab_policy')),
                    sigma=8.0,
                    small_instance_area_threshold=4096,
                    small_instance_weight=3.0)),
            validation_data=DataConfig(
                input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'),
                is_training=False,
                global_batch_size=eval_batch_size,
                parser=Parser(resize_eval_groundtruth=False,
                              groundtruth_padded_size=[640, 640],
                              aug_scale_min=1.0,
                              aug_scale_max=1.0,
                              aug_rand_hflip=False,
                              aug_type=None,
                              sigma=8.0,
                              small_instance_area_threshold=4096,
                              small_instance_weight=3.0),
                drop_remainder=False),
            evaluation=Evaluation(ignored_label=ignore_label,
                                  max_instances_per_category=256,
                                  offset=256 * 256 * 256,
                                  is_thing=is_thing,
                                  rescale_predictions=True,
                                  report_per_class_pq=False,
                                  report_per_class_iou=False,
                                  report_train_mean_iou=False)),
        trainer=cfg.TrainerConfig(
            train_steps=train_steps,
            validation_steps=validation_steps,
            validation_interval=steps_per_epoch,
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'adam',
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 0.0005,
                        'decay_steps': train_steps,
                        'end_learning_rate': 0.0,
                        'power': 0.9
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 2000,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config