예제 #1
0
def seg_deeplabv3_pascal() -> cfg.ExperimentConfig:
    """Image segmentation on pascal voc with resnet deeplabv3."""
    train_batch_size = 16
    eval_batch_size = 8
    steps_per_epoch = PASCAL_TRAIN_EXAMPLES // train_batch_size
    output_stride = 16
    aspp_dilation_rates = [12, 24, 36]  # [6, 12, 18] if output_stride = 16
    multigrid = [1, 2, 4]
    stem_type = 'v1'
    level = int(np.math.log2(output_stride))
    config = cfg.ExperimentConfig(
        task=SemanticSegmentationTask(
            model=SemanticSegmentationModel(
                num_classes=21,
                input_size=[None, None, 3],
                backbone=backbones.Backbone(
                    type='dilated_resnet',
                    dilated_resnet=backbones.DilatedResNet(
                        model_id=101,
                        output_stride=output_stride,
                        multigrid=multigrid,
                        stem_type=stem_type)),
                decoder=decoders.Decoder(
                    type='aspp',
                    aspp=decoders.ASPP(level=level,
                                       dilation_rates=aspp_dilation_rates)),
                head=SegmentationHead(level=level, num_convs=0),
                norm_activation=common.NormActivation(activation='swish',
                                                      norm_momentum=0.9997,
                                                      norm_epsilon=1e-3,
                                                      use_sync_bn=True)),
            losses=Losses(l2_weight_decay=1e-4),
            train_data=DataConfig(
                input_path=os.path.join(PASCAL_INPUT_PATH_BASE, 'train_aug*'),
                # TODO(arashwan): test changing size to 513 to match deeplab.
                output_size=[512, 512],
                is_training=True,
                global_batch_size=train_batch_size,
                aug_scale_min=0.5,
                aug_scale_max=2.0),
            validation_data=DataConfig(input_path=os.path.join(
                PASCAL_INPUT_PATH_BASE, 'val*'),
                                       output_size=[512, 512],
                                       is_training=False,
                                       global_batch_size=eval_batch_size,
                                       resize_eval_groundtruth=False,
                                       groundtruth_padded_size=[512, 512],
                                       drop_remainder=False),
            # resnet101
            init_checkpoint=
            'gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400',
            init_checkpoint_modules='backbone'),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=45 * steps_per_epoch,
            validation_steps=PASCAL_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 0.007,
                        'decay_steps': 45 * steps_per_epoch,
                        'end_learning_rate': 0.0,
                        'power': 0.9
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
예제 #2
0
def seg_deeplabv3plus_cityscapes() -> cfg.ExperimentConfig:
    """Image segmentation on cityscapes with resnet deeplabv3+."""
    train_batch_size = 16
    eval_batch_size = 16
    steps_per_epoch = CITYSCAPES_TRAIN_EXAMPLES // train_batch_size
    output_stride = 16
    aspp_dilation_rates = [6, 12, 18]
    multigrid = [1, 2, 4]
    stem_type = 'v1'
    level = int(np.math.log2(output_stride))
    config = cfg.ExperimentConfig(
        task=SemanticSegmentationTask(
            model=SemanticSegmentationModel(
                # Cityscapes uses only 19 semantic classes for train/evaluation.
                # The void (background) class is ignored in train and evaluation.
                num_classes=19,
                input_size=[None, None, 3],
                backbone=backbones.Backbone(
                    type='dilated_resnet',
                    dilated_resnet=backbones.DilatedResNet(
                        model_id=101,
                        output_stride=output_stride,
                        stem_type=stem_type,
                        multigrid=multigrid)),
                decoder=decoders.Decoder(
                    type='aspp',
                    aspp=decoders.ASPP(level=level,
                                       dilation_rates=aspp_dilation_rates,
                                       pool_kernel_size=[512, 1024])),
                head=SegmentationHead(level=level,
                                      num_convs=2,
                                      feature_fusion='deeplabv3plus',
                                      low_level=2,
                                      low_level_num_filters=48),
                norm_activation=common.NormActivation(activation='swish',
                                                      norm_momentum=0.99,
                                                      norm_epsilon=1e-3,
                                                      use_sync_bn=True)),
            losses=Losses(l2_weight_decay=1e-4),
            train_data=DataConfig(input_path=os.path.join(
                CITYSCAPES_INPUT_PATH_BASE, 'train_fine**'),
                                  crop_size=[512, 1024],
                                  output_size=[1024, 2048],
                                  is_training=True,
                                  global_batch_size=train_batch_size,
                                  aug_scale_min=0.5,
                                  aug_scale_max=2.0),
            validation_data=DataConfig(input_path=os.path.join(
                CITYSCAPES_INPUT_PATH_BASE, 'val_fine*'),
                                       output_size=[1024, 2048],
                                       is_training=False,
                                       global_batch_size=eval_batch_size,
                                       resize_eval_groundtruth=True,
                                       drop_remainder=False),
            # resnet101
            init_checkpoint=
            'gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400',
            init_checkpoint_modules='backbone'),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=500 * steps_per_epoch,
            validation_steps=CITYSCAPES_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 0.01,
                        'decay_steps': 500 * steps_per_epoch,
                        'end_learning_rate': 0.0,
                        'power': 0.9
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
예제 #3
0
def panoptic_deeplab_resnet_coco() -> cfg.ExperimentConfig:
    """COCO panoptic segmentation with Panoptic Deeplab."""
    train_steps = 200000
    train_batch_size = 64
    eval_batch_size = 1
    steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size
    validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size

    num_panoptic_categories = 201
    num_thing_categories = 91
    ignore_label = 0

    is_thing = [False]
    for idx in range(1, num_panoptic_categories):
        is_thing.append(True if idx <= num_thing_categories else False)

    input_size = [640, 640, 3]
    output_stride = 16
    aspp_dilation_rates = [6, 12, 18]
    multigrid = [1, 2, 4]
    stem_type = 'v1'
    level = int(np.math.log2(output_stride))

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16',
                                  enable_xla=True),
        task=PanopticDeeplabTask(
            init_checkpoint=
            'gs://tf_model_garden/vision/panoptic/panoptic_deeplab/imagenet/resnet50_v1/ckpt-436800',  # pylint: disable=line-too-long
            init_checkpoint_modules=['backbone'],
            model=PanopticDeeplab(
                num_classes=num_panoptic_categories,
                input_size=input_size,
                backbone=backbones.Backbone(
                    type='dilated_resnet',
                    dilated_resnet=backbones.DilatedResNet(
                        model_id=50,
                        stem_type=stem_type,
                        output_stride=output_stride,
                        multigrid=multigrid,
                        se_ratio=0.25,
                        last_stage_repeats=1,
                        stochastic_depth_drop_rate=0.2)),
                decoder=decoders.Decoder(
                    type='aspp',
                    aspp=decoders.ASPP(level=level,
                                       num_filters=256,
                                       pool_kernel_size=input_size[:2],
                                       dilation_rates=aspp_dilation_rates,
                                       use_depthwise_convolution=True,
                                       dropout_rate=0.1)),
                semantic_head=SemanticHead(level=level,
                                           num_convs=1,
                                           num_filters=256,
                                           kernel_size=5,
                                           use_depthwise_convolution=True,
                                           upsample_factor=1,
                                           low_level=[3, 2],
                                           low_level_num_filters=[64, 32],
                                           fusion_num_output_filters=256,
                                           prediction_kernel_size=1),
                instance_head=InstanceHead(level=level,
                                           num_convs=1,
                                           num_filters=32,
                                           kernel_size=5,
                                           use_depthwise_convolution=True,
                                           upsample_factor=1,
                                           low_level=[3, 2],
                                           low_level_num_filters=[32, 16],
                                           fusion_num_output_filters=128,
                                           prediction_kernel_size=1),
                shared_decoder=False,
                generate_panoptic_masks=True,
                post_processor=PanopticDeeplabPostProcessor(
                    output_size=input_size[:2],
                    center_score_threshold=0.1,
                    thing_class_ids=list(range(1, num_thing_categories)),
                    label_divisor=256,
                    stuff_area_limit=4096,
                    ignore_label=ignore_label,
                    nms_kernel=41,
                    keep_k_centers=200,
                    rescale_predictions=True)),
            losses=Losses(label_smoothing=0.0,
                          ignore_label=ignore_label,
                          l2_weight_decay=0.0,
                          top_k_percent_pixels=0.2,
                          segmentation_loss_weight=1.0,
                          center_heatmap_loss_weight=200,
                          center_offset_loss_weight=0.01),
            train_data=DataConfig(
                input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'),
                is_training=True,
                global_batch_size=train_batch_size,
                parser=Parser(
                    aug_scale_min=0.5,
                    aug_scale_max=1.5,
                    aug_rand_hflip=True,
                    aug_type=common.Augmentation(
                        type='autoaug',
                        autoaug=common.AutoAugment(
                            augmentation_name='panoptic_deeplab_policy')),
                    sigma=8.0,
                    small_instance_area_threshold=4096,
                    small_instance_weight=3.0)),
            validation_data=DataConfig(
                input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'),
                is_training=False,
                global_batch_size=eval_batch_size,
                parser=Parser(resize_eval_groundtruth=False,
                              groundtruth_padded_size=[640, 640],
                              aug_scale_min=1.0,
                              aug_scale_max=1.0,
                              aug_rand_hflip=False,
                              aug_type=None,
                              sigma=8.0,
                              small_instance_area_threshold=4096,
                              small_instance_weight=3.0),
                drop_remainder=False),
            evaluation=Evaluation(ignored_label=ignore_label,
                                  max_instances_per_category=256,
                                  offset=256 * 256 * 256,
                                  is_thing=is_thing,
                                  rescale_predictions=True,
                                  report_per_class_pq=False,
                                  report_per_class_iou=False,
                                  report_train_mean_iou=False)),
        trainer=cfg.TrainerConfig(
            train_steps=train_steps,
            validation_steps=validation_steps,
            validation_interval=steps_per_epoch,
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'adam',
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 0.0005,
                        'decay_steps': train_steps,
                        'end_learning_rate': 0.0,
                        'power': 0.9
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 2000,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config