Exemple #1
0
class CustomSemanticSegmentationTaskConfig(base_cfg.SemanticSegmentationTask):
  """Same config for custom taks."""

  model: Optional[base_cfg.SemanticSegmentationModel] = None
  train_data: base_cfg.DataConfig = base_cfg.DataConfig(is_training=True)
  validation_data: base_cfg.DataConfig = base_cfg.DataConfig(is_training=False)
  evaluation: base_cfg.Evaluation = base_cfg.Evaluation()
class PanopticMaskRCNNTask(maskrcnn.MaskRCNNTask):
  """Panoptic Mask R-CNN task config."""
  model: PanopticMaskRCNN = PanopticMaskRCNN()
  train_data: DataConfig = DataConfig(is_training=True)
  validation_data: DataConfig = DataConfig(is_training=False,
                                           drop_remainder=False)
  segmentation_evaluation: semantic_segmentation.Evaluation = semantic_segmentation.Evaluation()  # pylint: disable=line-too-long
  losses: Losses = Losses()
  init_checkpoint: Optional[str] = None
  segmentation_init_checkpoint: Optional[str] = None

  # 'init_checkpoint_modules' controls the modules that need to be initialized
  # from checkpoint paths given by 'init_checkpoint' and/or
  # 'segmentation_init_checkpoint. Supports modules:
  # 'backbone': Initialize MaskRCNN backbone
  # 'segmentation_backbone': Initialize segmentation backbone
  # 'segmentation_decoder': Initialize segmentation decoder
  # 'all': Initialize all modules
  init_checkpoint_modules: Optional[List[str]] = dataclasses.field(
      default_factory=list)
def panoptic_fpn_coco() -> cfg.ExperimentConfig:
    """COCO panoptic segmentation with Panoptic Mask R-CNN."""
    train_batch_size = 64
    eval_batch_size = 8
    steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size
    validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size

    # coco panoptic dataset has category ids ranging from [0-200] inclusive.
    # 0 is not used and represents the background class
    # ids 1-91 represent thing categories (91)
    # ids 92-200 represent stuff categories (109)
    # for the segmentation task, we continue using id=0 for the background
    # and map all thing categories to id=1, the remaining 109 stuff categories
    # are shifted by an offset=90 given by num_thing classes - 1. This shifting
    # will make all the stuff categories begin from id=2 and end at id=110
    num_panoptic_categories = 201
    num_thing_categories = 91
    num_semantic_segmentation_classes = 111

    is_thing = [False]
    for idx in range(1, num_panoptic_categories):
        is_thing.append(True if idx <= num_thing_categories else False)

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32',
                                  enable_xla=True),
        task=PanopticMaskRCNNTask(
            init_checkpoint=
            'gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',  # pylint: disable=line-too-long
            init_checkpoint_modules=['backbone'],
            model=PanopticMaskRCNN(
                num_classes=91,
                input_size=[1024, 1024, 3],
                panoptic_segmentation_generator=PanopticSegmentationGenerator(
                    output_size=[640, 640], rescale_predictions=True),
                stuff_classes_offset=90,
                segmentation_model=SEGMENTATION_MODEL(
                    num_classes=num_semantic_segmentation_classes,
                    head=SEGMENTATION_HEAD(
                        level=2,
                        num_convs=0,
                        num_filters=128,
                        decoder_min_level=2,
                        decoder_max_level=6,
                        feature_fusion='panoptic_fpn_fusion'))),
            losses=Losses(l2_weight_decay=0.00004),
            train_data=DataConfig(input_path=os.path.join(
                _COCO_INPUT_PATH_BASE, 'train*'),
                                  is_training=True,
                                  global_batch_size=train_batch_size,
                                  parser=Parser(aug_rand_hflip=True,
                                                aug_scale_min=0.8,
                                                aug_scale_max=1.25)),
            validation_data=DataConfig(
                input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'),
                is_training=False,
                global_batch_size=eval_batch_size,
                parser=Parser(segmentation_resize_eval_groundtruth=False,
                              segmentation_groundtruth_padded_size=[640, 640]),
                drop_remainder=False),
            annotation_file=os.path.join(_COCO_INPUT_PATH_BASE,
                                         'instances_val2017.json'),
            segmentation_evaluation=semantic_segmentation.Evaluation(
                report_per_class_iou=False, report_train_mean_iou=False),
            panoptic_quality_evaluator=PanopticQualityEvaluator(
                num_categories=num_panoptic_categories,
                ignored_label=0,
                is_thing=is_thing,
                rescale_predictions=True)),
        trainer=cfg.TrainerConfig(
            train_steps=22500,
            validation_steps=validation_steps,
            validation_interval=steps_per_epoch,
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'stepwise',
                    'stepwise': {
                        'boundaries': [15000, 20000],
                        'values': [0.12, 0.012, 0.0012],
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 500,
                        'warmup_learning_rate': 0.0067
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config
Exemple #4
0
def autoseg_edgetpu_experiment_config(
        backbone_name: str,
        init_backbone: bool = True) -> cfg.ExperimentConfig:
    """Experiment using the semantic segmenatation searched model.

  Args:
    backbone_name: Name of the backbone used for this model
    init_backbone: Whether to initialize backbone from a pretrained checkpoint
  Returns:
    ExperimentConfig
  """
    epochs = 300
    train_batch_size = 64
    eval_batch_size = 32
    image_size = 512
    steps_per_epoch = ADE20K_TRAIN_EXAMPLES // train_batch_size
    train_steps = epochs * steps_per_epoch
    model_config = AutosegEdgeTPUModelConfig(
        num_classes=32, input_size=[image_size, image_size, 3])
    model_config.model_params.model_name = backbone_name
    if init_backbone:
        model_config.model_params.model_weights_path = (
            BACKBONE_PRETRAINED_CHECKPOINT[backbone_name])
    model_config.model_params.overrides.resolution = image_size
    config = cfg.ExperimentConfig(
        task=AutosegEdgeTPUTaskConfig(
            model=model_config,
            train_data=base_cfg.DataConfig(
                input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'train-*'),
                output_size=[image_size, image_size],
                is_training=True,
                global_batch_size=train_batch_size,
                aug_scale_min=0.5,
                aug_scale_max=2.0),
            validation_data=base_cfg.DataConfig(
                input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'val-*'),
                output_size=[image_size, image_size],
                is_training=False,
                resize_eval_groundtruth=True,
                drop_remainder=True,
                global_batch_size=eval_batch_size),
            evaluation=base_cfg.Evaluation(report_train_mean_iou=False)),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch * 5,
            max_to_keep=10,
            train_steps=train_steps,
            validation_steps=ADE20K_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'nesterov': True,
                        'momentum': 0.9,
                    }
                },
                'ema': {
                    'average_decay': 0.9998,
                    'trainable_weights_only': False,
                },
                'learning_rate': {
                    'type': 'cosine',
                    'cosine': {
                        'initial_learning_rate': 0.12,
                        'decay_steps': train_steps
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                },
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config
def seg_deeplabv3plus_ade20k_32(backbone: str,
                                init_backbone: bool = True
                                ) -> cfg.ExperimentConfig:
    """Semantic segmentation on ADE20K dataset with deeplabv3+."""
    epochs = 200
    train_batch_size = 128
    eval_batch_size = 32
    image_size = 512
    steps_per_epoch = ADE20K_TRAIN_EXAMPLES // train_batch_size
    aspp_dilation_rates = [5, 10, 15]
    pretrained_checkpoint_path = BACKBONE_PRETRAINED_CHECKPOINT[
        backbone] if init_backbone else None
    config = cfg.ExperimentConfig(
        task=CustomSemanticSegmentationTaskConfig(
            model=base_cfg.SemanticSegmentationModel(
                # ADE20K uses only 32 semantic classes for train/evaluation.
                # The void (background) class is ignored in train and evaluation.
                num_classes=32,
                input_size=[None, None, 3],
                backbone=Backbone(
                    type='mobilenet_edgetpu',
                    mobilenet_edgetpu=MobileNetEdgeTPU(
                        model_id=backbone,
                        pretrained_checkpoint_path=pretrained_checkpoint_path,
                        freeze_large_filters=500,
                    )),
                decoder=decoders.Decoder(
                    type='aspp',
                    aspp=decoders.ASPP(
                        level=BACKBONE_HEADPOINT[backbone],
                        use_depthwise_convolution=True,
                        dilation_rates=aspp_dilation_rates,
                        pool_kernel_size=[256, 256],
                        num_filters=128,
                        dropout_rate=0.3,
                    )),
                head=base_cfg.SegmentationHead(
                    level=BACKBONE_HEADPOINT[backbone],
                    num_convs=2,
                    num_filters=256,
                    use_depthwise_convolution=True,
                    feature_fusion='deeplabv3plus',
                    low_level=BACKBONE_LOWER_FEATURES[backbone],
                    low_level_num_filters=48),
                norm_activation=common.NormActivation(activation='relu',
                                                      norm_momentum=0.99,
                                                      norm_epsilon=2e-3,
                                                      use_sync_bn=False)),
            train_data=base_cfg.DataConfig(
                input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'train-*'),
                output_size=[image_size, image_size],
                is_training=True,
                global_batch_size=train_batch_size),
            validation_data=base_cfg.DataConfig(
                input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'val-*'),
                output_size=[image_size, image_size],
                is_training=False,
                global_batch_size=eval_batch_size,
                resize_eval_groundtruth=True,
                drop_remainder=False),
            evaluation=base_cfg.Evaluation(report_train_mean_iou=False),
        ),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=epochs * steps_per_epoch,
            validation_steps=ADE20K_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'adam',
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 0.0001,
                        'decay_steps': epochs * steps_per_epoch,
                        'end_learning_rate': 0.0,
                        'power': 0.9
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 4 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config