Exemple #1
0
def simclr_pretraining_imagenet() -> cfg.ExperimentConfig:
    """Image classification general."""
    train_batch_size = 4096
    eval_batch_size = 4096
    steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
    return cfg.ExperimentConfig(
        task=SimCLRPretrainTask(
            model=SimCLRModel(
                mode=simclr_model.PRETRAIN,
                backbone_trainable=True,
                input_size=[224, 224, 3],
                backbone=backbones.Backbone(
                    type='resnet', resnet=backbones.ResNet(model_id=50)),
                projection_head=ProjectionHead(proj_output_dim=128,
                                               num_proj_layers=3,
                                               ft_proj_idx=1),
                supervised_head=SupervisedHead(num_classes=1001),
                norm_activation=common.NormActivation(norm_momentum=0.9,
                                                      norm_epsilon=1e-5,
                                                      use_sync_bn=True)),
            loss=ContrastiveLoss(),
            evaluation=Evaluation(),
            train_data=DataConfig(parser=Parser(mode=simclr_model.PRETRAIN),
                                  decoder=Decoder(decode_label=True),
                                  input_path=os.path.join(
                                      IMAGENET_INPUT_PATH_BASE, 'train*'),
                                  is_training=True,
                                  global_batch_size=train_batch_size),
            validation_data=DataConfig(
                parser=Parser(mode=simclr_model.PRETRAIN),
                decoder=Decoder(decode_label=True),
                input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
                is_training=False,
                global_batch_size=eval_batch_size),
        ),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=500 * steps_per_epoch,
            validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'lars',
                    'lars': {
                        'momentum':
                        0.9,
                        'weight_decay_rate':
                        0.000001,
                        'exclude_from_weight_decay':
                        ['batch_normalization', 'bias']
                    }
                },
                'learning_rate': {
                    'type': 'cosine',
                    'cosine': {
                        # 0.2 * BatchSize / 256
                        'initial_learning_rate': 0.2 * train_batch_size / 256,
                        # train_steps - warmup_steps
                        'decay_steps': 475 * steps_per_epoch
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        # 5% of total epochs
                        'warmup_steps': 25 * steps_per_epoch
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
Exemple #2
0
def seg_resnetfpn_pascal() -> cfg.ExperimentConfig:
    """Image segmentation on pascal voc with resnet-fpn."""
    train_batch_size = 256
    eval_batch_size = 32
    steps_per_epoch = PASCAL_TRAIN_EXAMPLES // train_batch_size
    config = cfg.ExperimentConfig(
        task=SemanticSegmentationTask(
            model=SemanticSegmentationModel(
                num_classes=21,
                input_size=[512, 512, 3],
                min_level=3,
                max_level=7,
                backbone=backbones.Backbone(
                    type='resnet', resnet=backbones.ResNet(model_id=50)),
                decoder=decoders.Decoder(type='fpn', fpn=decoders.FPN()),
                head=SegmentationHead(level=3, num_convs=3),
                norm_activation=common.NormActivation(activation='swish',
                                                      use_sync_bn=True)),
            losses=Losses(l2_weight_decay=1e-4),
            train_data=DataConfig(input_path=os.path.join(
                PASCAL_INPUT_PATH_BASE, 'train_aug*'),
                                  is_training=True,
                                  global_batch_size=train_batch_size,
                                  aug_scale_min=0.2,
                                  aug_scale_max=1.5),
            validation_data=DataConfig(input_path=os.path.join(
                PASCAL_INPUT_PATH_BASE, 'val*'),
                                       is_training=False,
                                       global_batch_size=eval_batch_size,
                                       resize_eval_groundtruth=False,
                                       groundtruth_padded_size=[512, 512],
                                       drop_remainder=False),
        ),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=450 * steps_per_epoch,
            validation_steps=PASCAL_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 0.007,
                        'decay_steps': 450 * steps_per_epoch,
                        'end_learning_rate': 0.0,
                        'power': 0.9
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
Exemple #3
0
def mnv2_deeplabv3_cityscapes() -> cfg.ExperimentConfig:
    """Image segmentation on cityscapes with mobilenetv2 deeplabv3."""
    train_batch_size = 16
    eval_batch_size = 16
    steps_per_epoch = CITYSCAPES_TRAIN_EXAMPLES // train_batch_size
    output_stride = 16
    aspp_dilation_rates = []
    pool_kernel_size = [512, 1024]

    level = int(np.math.log2(output_stride))
    config = cfg.ExperimentConfig(
        task=SemanticSegmentationTask(
            model=SemanticSegmentationModel(
                # Cityscapes uses only 19 semantic classes for train/evaluation.
                # The void (background) class is ignored in train and evaluation.
                num_classes=19,
                input_size=[None, None, 3],
                backbone=backbones.Backbone(type='mobilenet',
                                            mobilenet=backbones.MobileNet(
                                                model_id='MobileNetV2',
                                                output_stride=output_stride)),
                decoder=decoders.Decoder(
                    type='aspp',
                    aspp=decoders.ASPP(level=level,
                                       dilation_rates=aspp_dilation_rates,
                                       pool_kernel_size=pool_kernel_size)),
                head=SegmentationHead(level=level, num_convs=0),
                norm_activation=common.NormActivation(activation='relu',
                                                      norm_momentum=0.99,
                                                      norm_epsilon=1e-3,
                                                      use_sync_bn=True)),
            losses=Losses(l2_weight_decay=4e-5),
            train_data=DataConfig(input_path=os.path.join(
                CITYSCAPES_INPUT_PATH_BASE, 'train_fine**'),
                                  crop_size=[512, 1024],
                                  output_size=[1024, 2048],
                                  is_training=True,
                                  global_batch_size=train_batch_size,
                                  aug_scale_min=0.5,
                                  aug_scale_max=2.0),
            validation_data=DataConfig(input_path=os.path.join(
                CITYSCAPES_INPUT_PATH_BASE, 'val_fine*'),
                                       output_size=[1024, 2048],
                                       is_training=False,
                                       global_batch_size=eval_batch_size,
                                       resize_eval_groundtruth=True,
                                       drop_remainder=False),
            # Coco pre-trained mobilenetv2 checkpoint
            init_checkpoint=
            'gs://tf_model_garden/cloud/vision-2.0/deeplab/deeplabv3_mobilenetv2_coco/best_ckpt-63',
            init_checkpoint_modules='backbone'),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=100000,
            validation_steps=CITYSCAPES_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            best_checkpoint_eval_metric='mean_iou',
            best_checkpoint_export_subdir='best_ckpt',
            best_checkpoint_metric_comp='higher',
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 0.01,
                        'decay_steps': 100000,
                        'end_learning_rate': 0.0,
                        'power': 0.9
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
def retinanet_spinenet_coco() -> cfg.ExperimentConfig:
    """COCO object detection with RetinaNet using SpineNet backbone."""
    train_batch_size = 256
    eval_batch_size = 8
    steps_per_epoch = COCO_TRIAN_EXAMPLES // train_batch_size
    input_size = 640

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32'),
        task=RetinaNetTask(
            annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
                                         'instances_val2017.json'),
            model=RetinaNet(backbone=backbones.Backbone(
                type='spinenet',
                spinenet=backbones.SpineNet(model_id='49',
                                            stochastic_depth_drop_rate=0.2)),
                            decoder=decoders.Decoder(
                                type='identity', identity=decoders.Identity()),
                            anchor=Anchor(anchor_size=3),
                            norm_activation=common.NormActivation(
                                use_sync_bn=True, activation='swish'),
                            num_classes=91,
                            input_size=[input_size, input_size, 3],
                            min_level=3,
                            max_level=7),
            losses=Losses(l2_weight_decay=4e-5),
            train_data=DataConfig(input_path=os.path.join(
                COCO_INPUT_PATH_BASE, 'train*'),
                                  is_training=True,
                                  global_batch_size=train_batch_size,
                                  parser=Parser(aug_rand_hflip=True,
                                                aug_scale_min=0.1,
                                                aug_scale_max=2.0)),
            validation_data=DataConfig(input_path=os.path.join(
                COCO_INPUT_PATH_BASE, 'val*'),
                                       is_training=False,
                                       global_batch_size=eval_batch_size)),
        trainer=cfg.TrainerConfig(
            train_steps=500 * steps_per_epoch,
            validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'stepwise',
                    'stepwise': {
                        'boundaries':
                        [475 * steps_per_epoch, 490 * steps_per_epoch],
                        'values': [
                            0.32 * train_batch_size / 256.0,
                            0.032 * train_batch_size / 256.0,
                            0.0032 * train_batch_size / 256.0
                        ],
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 2000,
                        'warmup_learning_rate': 0.0067
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
Exemple #5
0
def deep_mask_head_rcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
    """COCO object detection with Mask R-CNN with deep mask heads."""
    global_batch_size = 64
    steps_per_epoch = int(retinanet_config.COCO_TRAIN_EXAMPLES /
                          global_batch_size)
    coco_val_samples = 5000

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
        task=DeepMaskHeadRCNNTask(
            init_checkpoint=
            'gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',
            init_checkpoint_modules='backbone',
            annotation_file=os.path.join(maskrcnn_config.COCO_INPUT_PATH_BASE,
                                         'instances_val2017.json'),
            model=DeepMaskHeadRCNN(num_classes=91,
                                   input_size=[1024, 1024, 3],
                                   include_mask=True),  # pytype: disable=wrong-keyword-args
            losses=maskrcnn_config.Losses(l2_weight_decay=0.00004),
            train_data=maskrcnn_config.DataConfig(
                input_path=os.path.join(maskrcnn_config.COCO_INPUT_PATH_BASE,
                                        'train*'),
                is_training=True,
                global_batch_size=global_batch_size,
                parser=maskrcnn_config.Parser(aug_rand_hflip=True,
                                              aug_scale_min=0.8,
                                              aug_scale_max=1.25)),
            validation_data=maskrcnn_config.DataConfig(input_path=os.path.join(
                maskrcnn_config.COCO_INPUT_PATH_BASE, 'val*'),
                                                       is_training=False,
                                                       global_batch_size=8)),  # pytype: disable=wrong-keyword-args
        trainer=cfg.TrainerConfig(
            train_steps=22500,
            validation_steps=coco_val_samples // 8,
            validation_interval=steps_per_epoch,
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'stepwise',
                    'stepwise': {
                        'boundaries': [15000, 20000],
                        'values': [0.12, 0.012, 0.0012],
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 500,
                        'warmup_learning_rate': 0.0067
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
Exemple #6
0
def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig:
    """Image classification on imagenet with mobilenet."""
    train_batch_size = 4096
    eval_batch_size = 4096
    steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
    config = cfg.ExperimentConfig(
        task=ImageClassificationTask(
            model=ImageClassificationModel(
                num_classes=1001,
                dropout_rate=0.2,
                input_size=[224, 224, 3],
                backbone=backbones.Backbone(type='mobilenet',
                                            mobilenet=backbones.MobileNet(
                                                model_id='MobileNetV2',
                                                filter_size_scale=1.0)),
                norm_activation=common.NormActivation(norm_momentum=0.997,
                                                      norm_epsilon=1e-3,
                                                      use_sync_bn=False)),
            losses=Losses(l2_weight_decay=1e-5, label_smoothing=0.1),
            train_data=DataConfig(input_path=os.path.join(
                IMAGENET_INPUT_PATH_BASE, 'train*'),
                                  is_training=True,
                                  global_batch_size=train_batch_size),
            validation_data=DataConfig(input_path=os.path.join(
                IMAGENET_INPUT_PATH_BASE, 'valid*'),
                                       is_training=False,
                                       global_batch_size=eval_batch_size)),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=500 * steps_per_epoch,
            validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'rmsprop',
                    'rmsprop': {
                        'rho': 0.9,
                        'momentum': 0.9,
                        'epsilon': 0.002,
                    }
                },
                'learning_rate': {
                    'type': 'exponential',
                    'exponential': {
                        'initial_learning_rate':
                        0.008 * (train_batch_size // 128),
                        'decay_steps': int(2.5 * steps_per_epoch),
                        'decay_rate': 0.98,
                        'staircase': True
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                },
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
Exemple #7
0
def panoptic_deeplab_coco() -> cfg.ExperimentConfig:
    """COCO panoptic segmentation with Panoptic Deeplab."""
    train_steps = 200000
    train_batch_size = 64
    eval_batch_size = 1
    steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size
    validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size

    num_panoptic_categories = 201
    num_thing_categories = 91
    ignore_label = 0

    is_thing = [False]
    for idx in range(1, num_panoptic_categories):
        is_thing.append(True if idx <= num_thing_categories else False)

    input_size = [640, 640, 3]
    output_stride = 16
    aspp_dilation_rates = [6, 12, 18]
    multigrid = [1, 2, 4]
    stem_type = 'v1'
    level = int(np.math.log2(output_stride))

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16',
                                  enable_xla=True),
        task=PanopticDeeplabTask(
            init_checkpoint=
            'gs://tf_model_garden/vision/panoptic/panoptic_deeplab/imagenet/resnet50_v1/ckpt-436800',  # pylint: disable=line-too-long
            init_checkpoint_modules=['backbone'],
            model=PanopticDeeplab(
                num_classes=num_panoptic_categories,
                input_size=input_size,
                backbone=backbones.Backbone(
                    type='dilated_resnet',
                    dilated_resnet=backbones.DilatedResNet(
                        model_id=50,
                        stem_type=stem_type,
                        output_stride=output_stride,
                        multigrid=multigrid,
                        se_ratio=0.25,
                        last_stage_repeats=1,
                        stochastic_depth_drop_rate=0.2)),
                decoder=decoders.Decoder(
                    type='aspp',
                    aspp=decoders.ASPP(level=level,
                                       num_filters=256,
                                       pool_kernel_size=input_size[:2],
                                       dilation_rates=aspp_dilation_rates,
                                       use_depthwise_convolution=True,
                                       dropout_rate=0.1)),
                semantic_head=SemanticHead(level=level,
                                           num_convs=1,
                                           num_filters=256,
                                           kernel_size=5,
                                           use_depthwise_convolution=True,
                                           upsample_factor=1,
                                           low_level=[3, 2],
                                           low_level_num_filters=[64, 32],
                                           fusion_num_output_filters=256,
                                           prediction_kernel_size=1),
                instance_head=InstanceHead(level=level,
                                           num_convs=1,
                                           num_filters=32,
                                           kernel_size=5,
                                           use_depthwise_convolution=True,
                                           upsample_factor=1,
                                           low_level=[3, 2],
                                           low_level_num_filters=[32, 16],
                                           fusion_num_output_filters=128,
                                           prediction_kernel_size=1),
                shared_decoder=False,
                generate_panoptic_masks=True,
                post_processor=PanopticDeeplabPostProcessor(
                    output_size=input_size[:2],
                    center_score_threshold=0.1,
                    thing_class_ids=list(range(1, num_thing_categories)),
                    label_divisor=256,
                    stuff_area_limit=4096,
                    ignore_label=ignore_label,
                    nms_kernel=41,
                    keep_k_centers=200,
                    rescale_predictions=True)),
            losses=Losses(label_smoothing=0.0,
                          ignore_label=ignore_label,
                          l2_weight_decay=0.0,
                          top_k_percent_pixels=0.2,
                          segmentation_loss_weight=1.0,
                          center_heatmap_loss_weight=200,
                          center_offset_loss_weight=0.01),
            train_data=DataConfig(
                input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'),
                is_training=True,
                global_batch_size=train_batch_size,
                parser=Parser(
                    aug_scale_min=0.5,
                    aug_scale_max=1.5,
                    aug_rand_hflip=True,
                    aug_type=common.Augmentation(
                        type='autoaug',
                        autoaug=common.AutoAugment(
                            augmentation_name='panoptic_deeplab_policy')),
                    sigma=8.0,
                    small_instance_area_threshold=4096,
                    small_instance_weight=3.0)),
            validation_data=DataConfig(
                input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'),
                is_training=False,
                global_batch_size=eval_batch_size,
                parser=Parser(resize_eval_groundtruth=False,
                              groundtruth_padded_size=[640, 640],
                              aug_scale_min=1.0,
                              aug_scale_max=1.0,
                              aug_rand_hflip=False,
                              aug_type=None,
                              sigma=8.0,
                              small_instance_area_threshold=4096,
                              small_instance_weight=3.0),
                drop_remainder=False),
            evaluation=Evaluation(ignored_label=ignore_label,
                                  max_instances_per_category=256,
                                  offset=256 * 256 * 256,
                                  is_thing=is_thing,
                                  rescale_predictions=True,
                                  report_per_class_pq=False,
                                  report_per_class_iou=False,
                                  report_train_mean_iou=False)),
        trainer=cfg.TrainerConfig(
            train_steps=train_steps,
            validation_steps=validation_steps,
            validation_interval=steps_per_epoch,
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'adam',
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 0.0005,
                        'decay_steps': train_steps,
                        'end_learning_rate': 0.0,
                        'power': 0.9
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 2000,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config
Exemple #8
0
def fasterrcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
    """COCO object detection with Faster R-CNN."""
    steps_per_epoch = 500
    coco_val_samples = 5000
    train_batch_size = 64
    eval_batch_size = 8

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
        task=MaskRCNNTask(
            init_checkpoint=
            'gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',
            init_checkpoint_modules='backbone',
            annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
                                         'instances_val2017.json'),
            model=MaskRCNN(num_classes=91,
                           input_size=[1024, 1024, 3],
                           include_mask=False,
                           mask_head=None,
                           mask_sampler=None,
                           mask_roi_aligner=None),
            losses=Losses(l2_weight_decay=0.00004),
            train_data=DataConfig(input_path=os.path.join(
                COCO_INPUT_PATH_BASE, 'train*'),
                                  is_training=True,
                                  global_batch_size=train_batch_size,
                                  parser=Parser(aug_rand_hflip=True,
                                                aug_scale_min=0.8,
                                                aug_scale_max=1.25)),
            validation_data=DataConfig(input_path=os.path.join(
                COCO_INPUT_PATH_BASE, 'val*'),
                                       is_training=False,
                                       global_batch_size=eval_batch_size,
                                       drop_remainder=False)),
        trainer=cfg.TrainerConfig(
            train_steps=22500,
            validation_steps=coco_val_samples // eval_batch_size,
            validation_interval=steps_per_epoch,
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'stepwise',
                    'stepwise': {
                        'boundaries': [15000, 20000],
                        'values': [0.12, 0.012, 0.0012],
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 500,
                        'warmup_learning_rate': 0.0067
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config
Exemple #9
0
def cascadercnn_spinenet_coco() -> cfg.ExperimentConfig:
    """COCO object detection with Cascade RCNN-RS with SpineNet backbone."""
    steps_per_epoch = 463
    coco_val_samples = 5000
    train_batch_size = 256
    eval_batch_size = 8

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
        task=MaskRCNNTask(
            annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
                                         'instances_val2017.json'),
            model=MaskRCNN(
                backbone=backbones.Backbone(type='spinenet',
                                            spinenet=backbones.SpineNet(
                                                model_id='49',
                                                min_level=3,
                                                max_level=7,
                                            )),
                decoder=decoders.Decoder(type='identity',
                                         identity=decoders.Identity()),
                roi_sampler=ROISampler(cascade_iou_thresholds=[0.6, 0.7]),
                detection_head=DetectionHead(class_agnostic_bbox_pred=True,
                                             cascade_class_ensemble=True),
                anchor=Anchor(anchor_size=3),
                norm_activation=common.NormActivation(use_sync_bn=True,
                                                      activation='swish'),
                num_classes=91,
                input_size=[640, 640, 3],
                min_level=3,
                max_level=7,
                include_mask=True),
            losses=Losses(l2_weight_decay=0.00004),
            train_data=DataConfig(input_path=os.path.join(
                COCO_INPUT_PATH_BASE, 'train*'),
                                  is_training=True,
                                  global_batch_size=train_batch_size,
                                  parser=Parser(aug_rand_hflip=True,
                                                aug_scale_min=0.1,
                                                aug_scale_max=2.5)),
            validation_data=DataConfig(input_path=os.path.join(
                COCO_INPUT_PATH_BASE, 'val*'),
                                       is_training=False,
                                       global_batch_size=eval_batch_size,
                                       drop_remainder=False)),
        trainer=cfg.TrainerConfig(
            train_steps=steps_per_epoch * 500,
            validation_steps=coco_val_samples // eval_batch_size,
            validation_interval=steps_per_epoch,
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'stepwise',
                    'stepwise': {
                        'boundaries':
                        [steps_per_epoch * 475, steps_per_epoch * 490],
                        'values': [0.32, 0.032, 0.0032],
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 2000,
                        'warmup_learning_rate': 0.0067
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None',
            'task.model.min_level == task.model.backbone.spinenet.min_level',
            'task.model.max_level == task.model.backbone.spinenet.max_level',
        ])
    return config
def autoseg_edgetpu_experiment_config(
        backbone_name: str,
        init_backbone: bool = True) -> cfg.ExperimentConfig:
    """Experiment using the semantic segmenatation searched model.

  Args:
    backbone_name: Name of the backbone used for this model
    init_backbone: Whether to initialize backbone from a pretrained checkpoint
  Returns:
    ExperimentConfig
  """
    epochs = 300
    train_batch_size = 64
    eval_batch_size = 32
    image_size = 512
    steps_per_epoch = ADE20K_TRAIN_EXAMPLES // train_batch_size
    train_steps = epochs * steps_per_epoch
    model_config = AutosegEdgeTPUModelConfig(
        num_classes=32, input_size=[image_size, image_size, 3])
    model_config.model_params.model_name = backbone_name
    if init_backbone:
        model_config.model_params.model_weights_path = (
            BACKBONE_PRETRAINED_CHECKPOINT[backbone_name])
    model_config.model_params.overrides.resolution = image_size
    config = cfg.ExperimentConfig(
        task=AutosegEdgeTPUTaskConfig(
            model=model_config,
            train_data=base_cfg.DataConfig(
                input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'train-*'),
                output_size=[image_size, image_size],
                is_training=True,
                global_batch_size=train_batch_size,
                aug_scale_min=0.5,
                aug_scale_max=2.0),
            validation_data=base_cfg.DataConfig(
                input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'val-*'),
                output_size=[image_size, image_size],
                is_training=False,
                resize_eval_groundtruth=True,
                drop_remainder=True,
                global_batch_size=eval_batch_size),
            evaluation=base_cfg.Evaluation(report_train_mean_iou=False)),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch * 5,
            max_to_keep=10,
            train_steps=train_steps,
            validation_steps=ADE20K_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'nesterov': True,
                        'momentum': 0.9,
                    }
                },
                'ema': {
                    'average_decay': 0.9998,
                    'trainable_weights_only': False,
                },
                'learning_rate': {
                    'type': 'cosine',
                    'cosine': {
                        'initial_learning_rate': 0.12,
                        'decay_steps': train_steps
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                },
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config
Exemple #11
0
def seg_deeplabv3plus_ade20k_32(backbone: str,
                                init_backbone: bool = True
                                ) -> cfg.ExperimentConfig:
    """Semantic segmentation on ADE20K dataset with deeplabv3+."""
    epochs = 200
    train_batch_size = 128
    eval_batch_size = 32
    image_size = 512
    steps_per_epoch = ADE20K_TRAIN_EXAMPLES // train_batch_size
    aspp_dilation_rates = [5, 10, 15]
    pretrained_checkpoint_path = BACKBONE_PRETRAINED_CHECKPOINT[
        backbone] if init_backbone else None
    config = cfg.ExperimentConfig(
        task=CustomSemanticSegmentationTaskConfig(
            model=base_cfg.SemanticSegmentationModel(
                # ADE20K uses only 32 semantic classes for train/evaluation.
                # The void (background) class is ignored in train and evaluation.
                num_classes=32,
                input_size=[None, None, 3],
                backbone=Backbone(
                    type='mobilenet_edgetpu',
                    mobilenet_edgetpu=MobileNetEdgeTPU(
                        model_id=backbone,
                        pretrained_checkpoint_path=pretrained_checkpoint_path,
                        freeze_large_filters=500,
                    )),
                decoder=decoders.Decoder(
                    type='aspp',
                    aspp=decoders.ASPP(
                        level=BACKBONE_HEADPOINT[backbone],
                        use_depthwise_convolution=True,
                        dilation_rates=aspp_dilation_rates,
                        pool_kernel_size=[256, 256],
                        num_filters=128,
                        dropout_rate=0.3,
                    )),
                head=base_cfg.SegmentationHead(
                    level=BACKBONE_HEADPOINT[backbone],
                    num_convs=2,
                    num_filters=256,
                    use_depthwise_convolution=True,
                    feature_fusion='deeplabv3plus',
                    low_level=BACKBONE_LOWER_FEATURES[backbone],
                    low_level_num_filters=48),
                norm_activation=common.NormActivation(activation='relu',
                                                      norm_momentum=0.99,
                                                      norm_epsilon=2e-3,
                                                      use_sync_bn=False)),
            train_data=base_cfg.DataConfig(
                input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'train-*'),
                output_size=[image_size, image_size],
                is_training=True,
                global_batch_size=train_batch_size),
            validation_data=base_cfg.DataConfig(
                input_path=os.path.join(ADE20K_INPUT_PATH_BASE, 'val-*'),
                output_size=[image_size, image_size],
                is_training=False,
                global_batch_size=eval_batch_size,
                resize_eval_groundtruth=True,
                drop_remainder=False),
            evaluation=base_cfg.Evaluation(report_train_mean_iou=False),
        ),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=epochs * steps_per_epoch,
            validation_steps=ADE20K_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'adam',
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 0.0001,
                        'decay_steps': epochs * steps_per_epoch,
                        'end_learning_rate': 0.0,
                        'power': 0.9
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 4 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
from official.nlp.data import pretrain_dataloader
from official.nlp.data import pretrain_dynamic_dataloader
from official.nlp.tasks import masked_lm

_TRAINER = cfg.TrainerConfig(train_steps=1000000,
                             optimizer_config=optimization.OptimizationConfig({
                                 'optimizer': {
                                     'type': 'adamw',
                                     'adamw': {
                                         'weight_decay_rate':
                                         0.01,
                                         'exclude_from_weight_decay':
                                         ['LayerNorm', 'layer_norm', 'bias'],
                                     }
                                 },
                                 'learning_rate': {
                                     'type': 'polynomial',
                                     'polynomial': {
                                         'initial_learning_rate': 1e-4,
                                         'end_learning_rate': 0.0,
                                     }
                                 },
                                 'warmup': {
                                     'type': 'polynomial'
                                 }
                             }))


@exp_factory.register_config_factory('bert/pretraining')
def bert_pretraining() -> cfg.ExperimentConfig:
    """BERT pretraining experiment."""
Exemple #13
0
def maskrcnn_mobilenet_coco() -> cfg.ExperimentConfig:
  """COCO object detection with Mask R-CNN with MobileNet backbone."""
  steps_per_epoch = 232
  coco_val_samples = 5000
  train_batch_size = 512
  eval_batch_size = 512

  config = cfg.ExperimentConfig(
      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
      task=MaskRCNNTask(
          annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
                                       'instances_val2017.json'),
          model=MaskRCNN(
              backbone=backbones.Backbone(
                  type='mobilenet',
                  mobilenet=backbones.MobileNet(model_id='MobileNetV2')),
              decoder=decoders.Decoder(
                  type='fpn',
                  fpn=decoders.FPN(num_filters=128, use_separable_conv=True)),
              rpn_head=RPNHead(use_separable_conv=True,
                               num_filters=128),  # 1/2 of original channels.
              detection_head=DetectionHead(
                  use_separable_conv=True, num_filters=128,
                  fc_dims=512),  # 1/2 of original channels.
              mask_head=MaskHead(use_separable_conv=True,
                                 num_filters=128),  # 1/2 of original channels.
              anchor=Anchor(anchor_size=3),
              norm_activation=common.NormActivation(
                  activation='relu6',
                  norm_momentum=0.99,
                  norm_epsilon=0.001,
                  use_sync_bn=True),
              num_classes=91,
              input_size=[512, 512, 3],
              min_level=3,
              max_level=6,
              include_mask=True),
          losses=Losses(l2_weight_decay=0.00004),
          train_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
              is_training=True,
              global_batch_size=train_batch_size,
              parser=Parser(
                  aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)),
          validation_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
              is_training=False,
              global_batch_size=eval_batch_size,
              drop_remainder=False)),
      trainer=cfg.TrainerConfig(
          train_steps=steps_per_epoch * 350,
          validation_steps=coco_val_samples // eval_batch_size,
          validation_interval=steps_per_epoch,
          steps_per_loop=steps_per_epoch,
          summary_interval=steps_per_epoch,
          checkpoint_interval=steps_per_epoch,
          optimizer_config=optimization.OptimizationConfig({
              'optimizer': {
                  'type': 'sgd',
                  'sgd': {
                      'momentum': 0.9
                  }
              },
              'learning_rate': {
                  'type': 'stepwise',
                  'stepwise': {
                      'boundaries': [
                          steps_per_epoch * 320, steps_per_epoch * 340
                      ],
                      'values': [0.32, 0.032, 0.0032],
                  }
              },
              'warmup': {
                  'type': 'linear',
                  'linear': {
                      'warmup_steps': 2000,
                      'warmup_learning_rate': 0.0067
                  }
              }
          })),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None',
      ])
  return config
Exemple #14
0
def simclr_finetuning_imagenet() -> cfg.ExperimentConfig:
    """Image classification general."""
    train_batch_size = 1024
    eval_batch_size = 1024
    steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
    pretrain_model_base = ''
    return cfg.ExperimentConfig(
        task=SimCLRFinetuneTask(
            model=SimCLRModel(
                mode=simclr_model.FINETUNE,
                backbone_trainable=True,
                input_size=[224, 224, 3],
                backbone=backbones.Backbone(
                    type='resnet', resnet=backbones.ResNet(model_id=50)),
                projection_head=ProjectionHead(proj_output_dim=128,
                                               num_proj_layers=3,
                                               ft_proj_idx=1),
                supervised_head=SupervisedHead(num_classes=1001,
                                               zero_init=True),
                norm_activation=common.NormActivation(norm_momentum=0.9,
                                                      norm_epsilon=1e-5,
                                                      use_sync_bn=False)),
            loss=ClassificationLosses(),
            evaluation=Evaluation(),
            train_data=DataConfig(parser=Parser(mode=simclr_model.FINETUNE),
                                  input_path=os.path.join(
                                      IMAGENET_INPUT_PATH_BASE, 'train*'),
                                  is_training=True,
                                  global_batch_size=train_batch_size),
            validation_data=DataConfig(
                parser=Parser(mode=simclr_model.FINETUNE),
                input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
                is_training=False,
                global_batch_size=eval_batch_size),
            init_checkpoint=pretrain_model_base,
            # all, backbone_projection or backbone
            init_checkpoint_modules='backbone_projection'),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=60 * steps_per_epoch,
            validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'lars',
                    'lars': {
                        'momentum':
                        0.9,
                        'weight_decay_rate':
                        0.0,
                        'exclude_from_weight_decay':
                        ['batch_normalization', 'bias']
                    }
                },
                'learning_rate': {
                    'type': 'cosine',
                    'cosine': {
                        # 0.01 × BatchSize / 512
                        'initial_learning_rate': 0.01 * train_batch_size / 512,
                        'decay_steps': 60 * steps_per_epoch
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
Exemple #15
0
def image_classification_imagenet_resnetrs() -> cfg.ExperimentConfig:
    """Image classification on imagenet with resnet-rs."""
    train_batch_size = 4096
    eval_batch_size = 4096
    steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
    config = cfg.ExperimentConfig(
        task=ImageClassificationTask(
            model=ImageClassificationModel(
                num_classes=1001,
                input_size=[160, 160, 3],
                backbone=backbones.Backbone(
                    type='resnet',
                    resnet=backbones.ResNet(model_id=50,
                                            stem_type='v1',
                                            resnetd_shortcut=True,
                                            replace_stem_max_pool=True,
                                            se_ratio=0.25,
                                            stochastic_depth_drop_rate=0.0)),
                dropout_rate=0.25,
                norm_activation=common.NormActivation(norm_momentum=0.0,
                                                      norm_epsilon=1e-5,
                                                      use_sync_bn=False,
                                                      activation='swish')),
            losses=Losses(l2_weight_decay=4e-5, label_smoothing=0.1),
            train_data=DataConfig(
                input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
                is_training=True,
                global_batch_size=train_batch_size,
                aug_type=common.Augmentation(
                    type='randaug', randaug=common.RandAugment(magnitude=10))),
            validation_data=DataConfig(input_path=os.path.join(
                IMAGENET_INPUT_PATH_BASE, 'valid*'),
                                       is_training=False,
                                       global_batch_size=eval_batch_size)),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=350 * steps_per_epoch,
            validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'ema': {
                    'average_decay': 0.9999,
                    'trainable_weights_only': False,
                },
                'learning_rate': {
                    'type': 'cosine',
                    'cosine': {
                        'initial_learning_rate': 1.6,
                        'decay_steps': 350 * steps_per_epoch
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config
def panoptic_maskrcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
  """COCO panoptic segmentation with Panoptic Mask R-CNN."""
  train_batch_size = 64
  eval_batch_size = 8
  steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size
  validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size

  config = cfg.ExperimentConfig(
      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
      task=PanopticMaskRCNNTask(
          init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',  # pylint: disable=line-too-long
          init_checkpoint_modules=['backbone'],
          model=PanopticMaskRCNN(
              num_classes=91, input_size=[1024, 1024, 3],
              segmentation_model=SEGMENTATION_MODEL(
                  num_classes=91,
                  head=SEGMENTATION_HEAD(level=3))),
          losses=Losses(l2_weight_decay=0.00004),
          train_data=DataConfig(
              input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'),
              is_training=True,
              global_batch_size=train_batch_size,
              parser=Parser(
                  aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)),
          validation_data=DataConfig(
              input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'),
              is_training=False,
              global_batch_size=eval_batch_size,
              drop_remainder=False),
          annotation_file=os.path.join(_COCO_INPUT_PATH_BASE,
                                       'instances_val2017.json')),
      trainer=cfg.TrainerConfig(
          train_steps=22500,
          validation_steps=validation_steps,
          validation_interval=steps_per_epoch,
          steps_per_loop=steps_per_epoch,
          summary_interval=steps_per_epoch,
          checkpoint_interval=steps_per_epoch,
          optimizer_config=optimization.OptimizationConfig({
              'optimizer': {
                  'type': 'sgd',
                  'sgd': {
                      'momentum': 0.9
                  }
              },
              'learning_rate': {
                  'type': 'stepwise',
                  'stepwise': {
                      'boundaries': [15000, 20000],
                      'values': [0.12, 0.012, 0.0012],
                  }
              },
              'warmup': {
                  'type': 'linear',
                  'linear': {
                      'warmup_steps': 500,
                      'warmup_learning_rate': 0.0067
                  }
              }
          })),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None'
      ])
  return config
Exemple #17
0
def image_classification_imagenet_revnet() -> cfg.ExperimentConfig:
    """Returns a revnet config for image classification on imagenet."""
    train_batch_size = 4096
    eval_batch_size = 4096
    steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size

    config = cfg.ExperimentConfig(
        task=ImageClassificationTask(
            model=ImageClassificationModel(
                num_classes=1001,
                input_size=[224, 224, 3],
                backbone=backbones.Backbone(
                    type='revnet', revnet=backbones.RevNet(model_id=56)),
                norm_activation=common.NormActivation(norm_momentum=0.9,
                                                      norm_epsilon=1e-5,
                                                      use_sync_bn=False),
                add_head_batch_norm=True),
            losses=Losses(l2_weight_decay=1e-4),
            train_data=DataConfig(input_path=os.path.join(
                IMAGENET_INPUT_PATH_BASE, 'train*'),
                                  is_training=True,
                                  global_batch_size=train_batch_size),
            validation_data=DataConfig(input_path=os.path.join(
                IMAGENET_INPUT_PATH_BASE, 'valid*'),
                                       is_training=False,
                                       global_batch_size=eval_batch_size)),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=90 * steps_per_epoch,
            validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'stepwise',
                    'stepwise': {
                        'boundaries': [
                            30 * steps_per_epoch, 60 * steps_per_epoch,
                            80 * steps_per_epoch
                        ],
                        'values': [0.8, 0.08, 0.008, 0.0008]
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
Exemple #18
0
def image_classification_imagenet_vit_pretrain() -> cfg.ExperimentConfig:
    """Image classification on imagenet with vision transformer."""
    train_batch_size = 4096
    eval_batch_size = 4096
    steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
    config = cfg.ExperimentConfig(
        task=ImageClassificationTask(
            model=ImageClassificationModel(num_classes=1001,
                                           input_size=[224, 224, 3],
                                           kernel_initializer='zeros',
                                           backbone=backbones.Backbone(
                                               type='vit',
                                               vit=backbones.VisionTransformer(
                                                   model_name='vit-b16',
                                                   representation_size=768))),
            losses=Losses(l2_weight_decay=0.0),
            train_data=DataConfig(input_path=os.path.join(
                IMAGENET_INPUT_PATH_BASE, 'train*'),
                                  is_training=True,
                                  global_batch_size=train_batch_size),
            validation_data=DataConfig(input_path=os.path.join(
                IMAGENET_INPUT_PATH_BASE, 'valid*'),
                                       is_training=False,
                                       global_batch_size=eval_batch_size)),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=300 * steps_per_epoch,
            validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'adamw',
                    'adamw': {
                        'weight_decay_rate': 0.3,
                        'include_in_weight_decay': r'.*(kernel|weight):0$',
                        'gradient_clip_norm': 0.0
                    }
                },
                'learning_rate': {
                    'type': 'cosine',
                    'cosine': {
                        'initial_learning_rate':
                        0.003 * train_batch_size / 4096,
                        'decay_steps': 300 * steps_per_epoch,
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 10000,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
Exemple #19
0
def wmt_transformer_large() -> cfg.ExperimentConfig:
  """WMT Transformer Large.

  Please refer to
  tensorflow_models/official/nlp/data/train_sentencepiece.py
  to generate sentencepiece_model
  and pass
  --params_override=task.sentencepiece_model_path='YOUR_PATH'
  to the train script.
  """
  learning_rate = 2.0
  hidden_size = 1024
  learning_rate *= (hidden_size**-0.5)
  warmup_steps = 16000
  train_steps = 300000
  token_batch_size = 24576
  encdecoder = translation.EncDecoder(
      num_attention_heads=16, intermediate_size=hidden_size * 4)
  config = cfg.ExperimentConfig(
      task=translation.TranslationConfig(
          model=translation.ModelConfig(
              encoder=encdecoder,
              decoder=encdecoder,
              embedding_width=hidden_size,
              padded_decode=True,
              decode_max_length=100),
          train_data=wmt_dataloader.WMTDataConfig(
              tfds_name='wmt14_translate/de-en',
              tfds_split='train',
              src_lang='en',
              tgt_lang='de',
              is_training=True,
              global_batch_size=token_batch_size,
              static_batch=True,
              max_seq_length=64
          ),
          validation_data=wmt_dataloader.WMTDataConfig(
              tfds_name='wmt14_translate/de-en',
              tfds_split='test',
              src_lang='en',
              tgt_lang='de',
              is_training=False,
              global_batch_size=32,
              static_batch=True,
              max_seq_length=100,
          ),
          sentencepiece_model_path=None,
      ),
      trainer=cfg.TrainerConfig(
          train_steps=train_steps,
          validation_steps=-1,
          steps_per_loop=1000,
          summary_interval=1000,
          checkpoint_interval=5000,
          validation_interval=5000,
          max_to_keep=1,
          optimizer_config=optimization.OptimizationConfig({
              'optimizer': {
                  'type': 'adam',
                  'adam': {
                      'beta_2': 0.997,
                      'epsilon': 1e-9,
                  },
              },
              'learning_rate': {
                  'type': 'power',
                  'power': {
                      'initial_learning_rate': learning_rate,
                      'power': -0.5,
                  }
              },
              'warmup': {
                  'type': 'linear',
                  'linear': {
                      'warmup_steps': warmup_steps,
                      'warmup_learning_rate': 0.0
                  }
              }
          })),
      restrictions=[
          'task.train_data.is_training != None',
          'task.sentencepiece_model_path != None',
      ])
  return config
Exemple #20
0
def image_classification_imagenet_deit_pretrain() -> cfg.ExperimentConfig:
    """Image classification on imagenet with vision transformer."""
    train_batch_size = 4096  # originally was 1024 but 4096 better for tpu v3-32
    eval_batch_size = 4096  # originally was 1024 but 4096 better for tpu v3-32
    num_classes = 1001
    label_smoothing = 0.1
    steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
    config = cfg.ExperimentConfig(
        task=ImageClassificationTask(
            model=ImageClassificationModel(
                num_classes=num_classes,
                input_size=[224, 224, 3],
                kernel_initializer='zeros',
                backbone=backbones.Backbone(
                    type='vit',
                    vit=backbones.VisionTransformer(
                        model_name='vit-b16',
                        representation_size=768,
                        init_stochastic_depth_rate=0.1,
                        original_init=False,
                        transformer=backbones.Transformer(
                            dropout_rate=0.0, attention_dropout_rate=0.0)))),
            losses=Losses(l2_weight_decay=0.0,
                          label_smoothing=label_smoothing,
                          one_hot=False,
                          soft_labels=True),
            train_data=DataConfig(
                input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
                is_training=True,
                global_batch_size=train_batch_size,
                aug_type=common.Augmentation(type='randaug',
                                             randaug=common.RandAugment(
                                                 magnitude=9,
                                                 exclude_ops=['Cutout'])),
                mixup_and_cutmix=common.MixupAndCutmix(
                    label_smoothing=label_smoothing)),
            validation_data=DataConfig(input_path=os.path.join(
                IMAGENET_INPUT_PATH_BASE, 'valid*'),
                                       is_training=False,
                                       global_batch_size=eval_batch_size)),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=300 * steps_per_epoch,
            validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'adamw',
                    'adamw': {
                        'weight_decay_rate': 0.05,
                        'include_in_weight_decay': r'.*(kernel|weight):0$',
                        'gradient_clip_norm': 0.0
                    }
                },
                'learning_rate': {
                    'type': 'cosine',
                    'cosine': {
                        'initial_learning_rate':
                        0.0005 * train_batch_size / 512,
                        'decay_steps': 300 * steps_per_epoch,
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
Exemple #21
0
def seg_deeplabv3plus_scooter() -> cfg.ExperimentConfig:
  """Image segmentation on scooter dataset with resnet deeplabv3+.
  Barebones config for testing purpose (modify batch size, initial lr, steps per epoch, train input path, val input path)
  """
  scooter_path_glob = 'D:/data/test_data/val**'
  steps_per_epoch = 1
  output_stride = 16
  aspp_dilation_rates = [6, 12, 18]
  multigrid = [1, 2, 4]
  stem_type = 'v1'
  level = int(np.math.log2(output_stride))
  
  config = cfg.ExperimentConfig(
      task=SemanticSegmentationTask(
          model=SemanticSegmentationModel(
              num_classes=19,
              input_size=[512, 512, 3], # specifying this speeds up model inference, no change in size
              backbone=backbones.Backbone(
                  type='dilated_resnet', dilated_resnet=backbones.DilatedResNet(
                      model_id=101, output_stride=output_stride,
                      stem_type=stem_type, multigrid=multigrid)),
              decoder=decoders.Decoder(
                  type='aspp',
                  aspp=decoders.ASPP(
                      level=level, dilation_rates=aspp_dilation_rates)),
              head=SegmentationHead(
                  level=level,
                  num_convs=2,
                  feature_fusion='deeplabv3plus',
                  low_level=2,
                  low_level_num_filters=48),
              norm_activation=common.NormActivation(
                  activation='swish',
                  norm_momentum=0.99,
                  norm_epsilon=1e-3,
                  use_sync_bn=True)),
          losses=Losses(
              l2_weight_decay=1e-4,
              ignore_label=250),
          train_data=DataConfig(
              input_path=scooter_path_glob,
              output_size=[512, 512],
              is_training=True,
              global_batch_size=1,
              aug_scale_min=0.5,
              aug_scale_max=2.0),
          validation_data=DataConfig(
              input_path=scooter_path_glob,
              output_size=[512, 512],
              is_training=False,
              global_batch_size=1,
              resize_eval_groundtruth=True,
              drop_remainder=False)),
          # resnet101
          # init_checkpoint='D:/repos/data_root/test_data/deeplab_cityscapes_pretrained/model.ckpt',
          # init_checkpoint_modules='all'),
          # init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400',
          # init_checkpoint_modules='backbone'),
      trainer=cfg.TrainerConfig(
          steps_per_loop=steps_per_epoch,
          summary_interval=steps_per_epoch,
          checkpoint_interval=steps_per_epoch,
          train_steps=500 * steps_per_epoch,
          validation_steps=1021,
          validation_interval=steps_per_epoch,
          continuous_eval_timeout=1,
          optimizer_config=optimization.OptimizationConfig({
              'optimizer': {
                  'type': 'sgd',
                  'sgd': {
                      'momentum': 0.9
                  }
              },
              'learning_rate': {
                  'type': 'polynomial',
                  'polynomial': {
                      'initial_learning_rate': 0.007,
                      'decay_steps': 500 * steps_per_epoch,
                      'end_learning_rate': 0.0,
                      'power': 0.9
                  }
              },
              'warmup': {
                  'type': 'linear',
                  'linear': {
                      'warmup_steps': 5 * steps_per_epoch,
                      'warmup_learning_rate': 0
                  }
              }
          })),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None'
      ])

  return config
Exemple #22
0
  def prepare_config(self, teacher_block_num, student_block_num,
                     transfer_teacher_layers):
    # using small model for testing
    task_config = distillation.BertDistillationTaskConfig(
        teacher_model=bert.PretrainerConfig(
            encoder=encoders.EncoderConfig(
                type='mobilebert',
                mobilebert=encoders.MobileBertEncoderConfig(
                    num_blocks=teacher_block_num)),
            cls_heads=[
                bert.ClsHeadConfig(
                    inner_dim=256,
                    num_classes=2,
                    dropout_rate=0.1,
                    name='next_sentence')
            ],
            mlm_activation='gelu'),
        student_model=bert.PretrainerConfig(
            encoder=encoders.EncoderConfig(
                type='mobilebert',
                mobilebert=encoders.MobileBertEncoderConfig(
                    num_blocks=student_block_num)),
            cls_heads=[
                bert.ClsHeadConfig(
                    inner_dim=256,
                    num_classes=2,
                    dropout_rate=0.1,
                    name='next_sentence')
            ],
            mlm_activation='relu'),
        train_data=pretrain_dataloader.BertPretrainDataConfig(
            input_path='dummy',
            max_predictions_per_seq=76,
            seq_length=512,
            global_batch_size=10),
        validation_data=pretrain_dataloader.BertPretrainDataConfig(
            input_path='dummy',
            max_predictions_per_seq=76,
            seq_length=512,
            global_batch_size=10))

    # set only 1 step for each stage
    progressive_config = distillation.BertDistillationProgressiveConfig()
    progressive_config.layer_wise_distill_config.transfer_teacher_layers = (
        transfer_teacher_layers)
    progressive_config.layer_wise_distill_config.num_steps = 1
    progressive_config.pretrain_distill_config.num_steps = 1

    optimization_config = optimization.OptimizationConfig(
        optimizer=optimization.OptimizerConfig(
            type='lamb',
            lamb=optimization.LAMBConfig(
                weight_decay_rate=0.0001,
                exclude_from_weight_decay=[
                    'LayerNorm', 'layer_norm', 'bias', 'no_norm'
                ])),
        learning_rate=optimization.LrConfig(
            type='polynomial',
            polynomial=optimization.PolynomialLrConfig(
                initial_learning_rate=1.5e-3,
                decay_steps=10000,
                end_learning_rate=1.5e-3)),
        warmup=optimization.WarmupConfig(
            type='linear',
            linear=optimization.LinearWarmupConfig(warmup_learning_rate=0)))

    exp_config = cfg.ExperimentConfig(
        task=task_config,
        trainer=prog_trainer_lib.ProgressiveTrainerConfig(
            progressive=progressive_config,
            optimizer_config=optimization_config))

    # Create a teacher model checkpoint.
    teacher_encoder = encoders.build_encoder(task_config.teacher_model.encoder)
    pretrainer_config = task_config.teacher_model
    if pretrainer_config.cls_heads:
      teacher_cls_heads = [
          layers.ClassificationHead(**cfg.as_dict())
          for cfg in pretrainer_config.cls_heads
      ]
    else:
      teacher_cls_heads = []

    masked_lm = layers.MobileBertMaskedLM(
        embedding_table=teacher_encoder.get_embedding_table(),
        activation=tf_utils.get_activation(pretrainer_config.mlm_activation),
        initializer=tf.keras.initializers.TruncatedNormal(
            stddev=pretrainer_config.mlm_initializer_range),
        name='cls/predictions')
    teacher_pretrainer = models.BertPretrainerV2(
        encoder_network=teacher_encoder,
        classification_heads=teacher_cls_heads,
        customized_masked_lm=masked_lm)

    # The model variables will be created after the forward call.
    _ = teacher_pretrainer(teacher_pretrainer.inputs)
    teacher_pretrainer_ckpt = tf.train.Checkpoint(
        **teacher_pretrainer.checkpoint_items)
    teacher_ckpt_path = os.path.join(self.get_temp_dir(), 'teacher_model.ckpt')
    teacher_pretrainer_ckpt.save(teacher_ckpt_path)
    exp_config.task.teacher_model_init_checkpoint = self.get_temp_dir()

    return exp_config
Exemple #23
0
def deep_mask_head_rcnn_spinenet_coco() -> cfg.ExperimentConfig:
    """COCO object detection with Mask R-CNN with SpineNet backbone."""
    steps_per_epoch = 463
    coco_val_samples = 5000
    train_batch_size = 256
    eval_batch_size = 8

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
        task=DeepMaskHeadRCNNTask(
            annotation_file=os.path.join(maskrcnn_config.COCO_INPUT_PATH_BASE,
                                         'instances_val2017.json'),  # pytype: disable=wrong-keyword-args
            model=DeepMaskHeadRCNN(
                backbone=backbones.Backbone(type='spinenet',
                                            spinenet=backbones.SpineNet(
                                                model_id='49',
                                                min_level=3,
                                                max_level=7,
                                            )),
                decoder=decoders.Decoder(type='identity',
                                         identity=decoders.Identity()),
                anchor=maskrcnn_config.Anchor(anchor_size=3),
                norm_activation=common.NormActivation(use_sync_bn=True),
                num_classes=91,
                input_size=[640, 640, 3],
                min_level=3,
                max_level=7,
                include_mask=True),  # pytype: disable=wrong-keyword-args
            losses=maskrcnn_config.Losses(l2_weight_decay=0.00004),
            train_data=maskrcnn_config.DataConfig(
                input_path=os.path.join(maskrcnn_config.COCO_INPUT_PATH_BASE,
                                        'train*'),
                is_training=True,
                global_batch_size=train_batch_size,
                parser=maskrcnn_config.Parser(aug_rand_hflip=True,
                                              aug_scale_min=0.5,
                                              aug_scale_max=2.0)),
            validation_data=maskrcnn_config.DataConfig(
                input_path=os.path.join(maskrcnn_config.COCO_INPUT_PATH_BASE,
                                        'val*'),
                is_training=False,
                global_batch_size=eval_batch_size,
                drop_remainder=False)),  # pytype: disable=wrong-keyword-args
        trainer=cfg.TrainerConfig(
            train_steps=steps_per_epoch * 350,
            validation_steps=coco_val_samples // eval_batch_size,
            validation_interval=steps_per_epoch,
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'stepwise',
                    'stepwise': {
                        'boundaries':
                        [steps_per_epoch * 320, steps_per_epoch * 340],
                        'values': [0.32, 0.032, 0.0032],
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 2000,
                        'warmup_learning_rate': 0.0067
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None',
            'task.model.min_level == task.model.backbone.spinenet.min_level',
            'task.model.max_level == task.model.backbone.spinenet.max_level',
        ])
    return config
Exemple #24
0
def panoptic_fpn_coco() -> cfg.ExperimentConfig:
    """COCO panoptic segmentation with Panoptic Mask R-CNN."""
    train_batch_size = 64
    eval_batch_size = 8
    steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size
    validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size

    # coco panoptic dataset has category ids ranging from [0-200] inclusive.
    # 0 is not used and represents the background class
    # ids 1-91 represent thing categories (91)
    # ids 92-200 represent stuff categories (109)
    # for the segmentation task, we continue using id=0 for the background
    # and map all thing categories to id=1, the remaining 109 stuff categories
    # are shifted by an offset=90 given by num_thing classes - 1. This shifting
    # will make all the stuff categories begin from id=2 and end at id=110
    num_panoptic_categories = 201
    num_thing_categories = 91
    num_semantic_segmentation_classes = 111

    is_thing = [False]
    for idx in range(1, num_panoptic_categories):
        is_thing.append(True if idx <= num_thing_categories else False)

    config = cfg.ExperimentConfig(
        runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32',
                                  enable_xla=True),
        task=PanopticMaskRCNNTask(
            init_checkpoint=
            'gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',  # pylint: disable=line-too-long
            init_checkpoint_modules=['backbone'],
            model=PanopticMaskRCNN(
                num_classes=91,
                input_size=[1024, 1024, 3],
                panoptic_segmentation_generator=PanopticSegmentationGenerator(
                    output_size=[640, 640], rescale_predictions=True),
                stuff_classes_offset=90,
                segmentation_model=SEGMENTATION_MODEL(
                    num_classes=num_semantic_segmentation_classes,
                    head=SEGMENTATION_HEAD(
                        level=2,
                        num_convs=0,
                        num_filters=128,
                        decoder_min_level=2,
                        decoder_max_level=6,
                        feature_fusion='panoptic_fpn_fusion'))),
            losses=Losses(l2_weight_decay=0.00004),
            train_data=DataConfig(input_path=os.path.join(
                _COCO_INPUT_PATH_BASE, 'train*'),
                                  is_training=True,
                                  global_batch_size=train_batch_size,
                                  parser=Parser(aug_rand_hflip=True,
                                                aug_scale_min=0.8,
                                                aug_scale_max=1.25)),
            validation_data=DataConfig(
                input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'),
                is_training=False,
                global_batch_size=eval_batch_size,
                parser=Parser(segmentation_resize_eval_groundtruth=False,
                              segmentation_groundtruth_padded_size=[640, 640]),
                drop_remainder=False),
            annotation_file=os.path.join(_COCO_INPUT_PATH_BASE,
                                         'instances_val2017.json'),
            segmentation_evaluation=semantic_segmentation.Evaluation(
                report_per_class_iou=False, report_train_mean_iou=False),
            panoptic_quality_evaluator=PanopticQualityEvaluator(
                num_categories=num_panoptic_categories,
                ignored_label=0,
                is_thing=is_thing,
                rescale_predictions=True)),
        trainer=cfg.TrainerConfig(
            train_steps=22500,
            validation_steps=validation_steps,
            validation_interval=steps_per_epoch,
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'stepwise',
                    'stepwise': {
                        'boundaries': [15000, 20000],
                        'values': [0.12, 0.012, 0.0012],
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 500,
                        'warmup_learning_rate': 0.0067
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config
Exemple #25
0
def seg_deeplabv3_pascal() -> cfg.ExperimentConfig:
    """Image segmentation on pascal voc with resnet deeplabv3."""
    train_batch_size = 16
    eval_batch_size = 8
    steps_per_epoch = PASCAL_TRAIN_EXAMPLES // train_batch_size
    output_stride = 16
    aspp_dilation_rates = [12, 24, 36]  # [6, 12, 18] if output_stride = 16
    multigrid = [1, 2, 4]
    stem_type = 'v1'
    level = int(np.math.log2(output_stride))
    config = cfg.ExperimentConfig(
        task=SemanticSegmentationTask(
            model=SemanticSegmentationModel(
                num_classes=21,
                input_size=[None, None, 3],
                backbone=backbones.Backbone(
                    type='dilated_resnet',
                    dilated_resnet=backbones.DilatedResNet(
                        model_id=101,
                        output_stride=output_stride,
                        multigrid=multigrid,
                        stem_type=stem_type)),
                decoder=decoders.Decoder(
                    type='aspp',
                    aspp=decoders.ASPP(level=level,
                                       dilation_rates=aspp_dilation_rates)),
                head=SegmentationHead(level=level, num_convs=0),
                norm_activation=common.NormActivation(activation='swish',
                                                      norm_momentum=0.9997,
                                                      norm_epsilon=1e-3,
                                                      use_sync_bn=True)),
            losses=Losses(l2_weight_decay=1e-4),
            train_data=DataConfig(
                input_path=os.path.join(PASCAL_INPUT_PATH_BASE, 'train_aug*'),
                # TODO(arashwan): test changing size to 513 to match deeplab.
                output_size=[512, 512],
                is_training=True,
                global_batch_size=train_batch_size,
                aug_scale_min=0.5,
                aug_scale_max=2.0),
            validation_data=DataConfig(input_path=os.path.join(
                PASCAL_INPUT_PATH_BASE, 'val*'),
                                       output_size=[512, 512],
                                       is_training=False,
                                       global_batch_size=eval_batch_size,
                                       resize_eval_groundtruth=False,
                                       groundtruth_padded_size=[512, 512],
                                       drop_remainder=False),
            # resnet101
            init_checkpoint=
            'gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400',
            init_checkpoint_modules='backbone'),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=45 * steps_per_epoch,
            validation_steps=PASCAL_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 0.007,
                        'decay_steps': 45 * steps_per_epoch,
                        'end_learning_rate': 0.0,
                        'power': 0.9
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
Exemple #26
0
def mobilenet_edgetpu_base_experiment_config(
        model_name: str) -> cfg.ExperimentConfig:
    """Image classification on imagenet with mobilenet_edgetpu.

  Experiment config common across all mobilenet_edgetpu variants.
  Args:
    model_name: Name of the mobilenet_edgetpu model variant
  Returns:
    ExperimentConfig
  """
    train_batch_size = 4096
    eval_batch_size = 4096
    steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size
    mobilenet_edgetpu_config = MobilenetEdgeTPUModelConfig(
        num_classes=1001, input_size=[224, 224, 3])
    mobilenet_edgetpu_config.model_params.model_name = model_name
    config = cfg.ExperimentConfig(
        task=MobilenetEdgeTPUTaskConfig(
            model=mobilenet_edgetpu_config,
            losses=base_config.Losses(label_smoothing=0.1),
            train_data=base_config.DataConfig(
                input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'),
                is_training=True,
                global_batch_size=train_batch_size,
                dtype='bfloat16',
                aug_type=common.Augmentation(type='autoaug')),
            validation_data=base_config.DataConfig(
                input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'),
                is_training=False,
                dtype='bfloat16',
                drop_remainder=False,
                global_batch_size=eval_batch_size)),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch * 5,
            max_to_keep=10,
            train_steps=550 * steps_per_epoch,
            validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'rmsprop',
                    'rmsprop': {
                        'rho': 0.9,
                        'momentum': 0.9,
                        'epsilon': 0.001,
                    }
                },
                'ema': {
                    'average_decay': 0.99,
                    'trainable_weights_only': False,
                },
                'learning_rate': {
                    'type': 'exponential',
                    'exponential': {
                        'initial_learning_rate':
                        0.008 * (train_batch_size // 128),
                        'decay_steps': int(2.4 * steps_per_epoch),
                        'decay_rate': 0.97,
                        'staircase': True
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                },
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])
    return config
Exemple #27
0
def seg_deeplabv3plus_cityscapes() -> cfg.ExperimentConfig:
    """Image segmentation on cityscapes with resnet deeplabv3+."""
    train_batch_size = 16
    eval_batch_size = 16
    steps_per_epoch = CITYSCAPES_TRAIN_EXAMPLES // train_batch_size
    output_stride = 16
    aspp_dilation_rates = [6, 12, 18]
    multigrid = [1, 2, 4]
    stem_type = 'v1'
    level = int(np.math.log2(output_stride))
    config = cfg.ExperimentConfig(
        task=SemanticSegmentationTask(
            model=SemanticSegmentationModel(
                # Cityscapes uses only 19 semantic classes for train/evaluation.
                # The void (background) class is ignored in train and evaluation.
                num_classes=19,
                input_size=[None, None, 3],
                backbone=backbones.Backbone(
                    type='dilated_resnet',
                    dilated_resnet=backbones.DilatedResNet(
                        model_id=101,
                        output_stride=output_stride,
                        stem_type=stem_type,
                        multigrid=multigrid)),
                decoder=decoders.Decoder(
                    type='aspp',
                    aspp=decoders.ASPP(level=level,
                                       dilation_rates=aspp_dilation_rates,
                                       pool_kernel_size=[512, 1024])),
                head=SegmentationHead(level=level,
                                      num_convs=2,
                                      feature_fusion='deeplabv3plus',
                                      low_level=2,
                                      low_level_num_filters=48),
                norm_activation=common.NormActivation(activation='swish',
                                                      norm_momentum=0.99,
                                                      norm_epsilon=1e-3,
                                                      use_sync_bn=True)),
            losses=Losses(l2_weight_decay=1e-4),
            train_data=DataConfig(input_path=os.path.join(
                CITYSCAPES_INPUT_PATH_BASE, 'train_fine**'),
                                  crop_size=[512, 1024],
                                  output_size=[1024, 2048],
                                  is_training=True,
                                  global_batch_size=train_batch_size,
                                  aug_scale_min=0.5,
                                  aug_scale_max=2.0),
            validation_data=DataConfig(input_path=os.path.join(
                CITYSCAPES_INPUT_PATH_BASE, 'val_fine*'),
                                       output_size=[1024, 2048],
                                       is_training=False,
                                       global_batch_size=eval_batch_size,
                                       resize_eval_groundtruth=True,
                                       drop_remainder=False),
            # resnet101
            init_checkpoint=
            'gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400',
            init_checkpoint_modules='backbone'),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=500 * steps_per_epoch,
            validation_steps=CITYSCAPES_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 0.01,
                        'decay_steps': 500 * steps_per_epoch,
                        'end_learning_rate': 0.0,
                        'power': 0.9
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config
Exemple #28
0
from official.modeling import performance
from official.modeling.fast_training.progressive import train_lib
from official.modeling.fast_training.progressive import trainer as prog_trainer_lib
from official.nlp.data import pretrain_dataloader
from official.nlp.projects.mobilebert import distillation

FLAGS = flags.FLAGS

optimization_config = optimization.OptimizationConfig(
    optimizer=optimization.OptimizerConfig(
        type='lamb',
        lamb=optimization.LAMBConfig(
            weight_decay_rate=0.01,
            exclude_from_weight_decay=['LayerNorm', 'bias', 'norm'],
            clipnorm=1.0)),
    learning_rate=optimization.LrConfig(
        type='polynomial',
        polynomial=optimization.PolynomialLrConfig(
            initial_learning_rate=1.5e-3,
            decay_steps=10000,
            end_learning_rate=1.5e-3)),
    warmup=optimization.WarmupConfig(
        type='linear',
        linear=optimization.LinearWarmupConfig(warmup_learning_rate=0)))


# copy from progressive/utils.py due to the private visibility issue.
def config_override(params, flags_obj):
    """Override ExperimentConfig according to flags."""
    # Change runtime.tpu to the real tpu.
    params.override({'runtime': {
        'tpu': flags_obj.tpu,
Exemple #29
0
def retinanet_resnetfpn_coco() -> cfg.ExperimentConfig:
  """COCO object detection with RetinaNet."""
  train_batch_size = 256
  eval_batch_size = 8
  steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size

  config = cfg.ExperimentConfig(
      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
      task=RetinaNetTask(
          init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080',
          init_checkpoint_modules='backbone',
          annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
                                       'instances_val2017.json'),
          model=RetinaNet(
              num_classes=91,
              input_size=[640, 640, 3],
              min_level=3,
              max_level=7),
          losses=Losses(l2_weight_decay=1e-4),
          train_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
              is_training=True,
              global_batch_size=train_batch_size,
              parser=Parser(
                  aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)),
          validation_data=DataConfig(
              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
              is_training=False,
              global_batch_size=eval_batch_size)),
      trainer=cfg.TrainerConfig(
          train_steps=72 * steps_per_epoch,
          validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
          validation_interval=steps_per_epoch,
          steps_per_loop=steps_per_epoch,
          summary_interval=steps_per_epoch,
          checkpoint_interval=steps_per_epoch,
          optimizer_config=optimization.OptimizationConfig({
              'optimizer': {
                  'type': 'sgd',
                  'sgd': {
                      'momentum': 0.9
                  }
              },
              'learning_rate': {
                  'type': 'stepwise',
                  'stepwise': {
                      'boundaries': [
                          57 * steps_per_epoch, 67 * steps_per_epoch
                      ],
                      'values': [
                          0.32 * train_batch_size / 256.0,
                          0.032 * train_batch_size / 256.0,
                          0.0032 * train_batch_size / 256.0
                      ],
                  }
              },
              'warmup': {
                  'type': 'linear',
                  'linear': {
                      'warmup_steps': 500,
                      'warmup_learning_rate': 0.0067
                  }
              }
          })),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None'
      ])

  return config
Exemple #30
0
def seg_deeplabv2_pascal() -> cfg.ExperimentConfig:
    """Image segmentation on imagenet with vggnet & resnet deeplabv2."""
    train_batch_size = 16
    eval_batch_size = 8
    steps_per_epoch = PASCAL_TRAIN_EXAMPLES // train_batch_size

    # for Large FOV
    fov_dilation_rates = 12
    kernel_size = 3

    # for ASPP
    aspp_dilation_rates = [6, 12, 18, 24]

    output_stride = 16
    level = int(np.math.log2(output_stride))

    config = cfg.ExperimentConfig(
        task=SemanticSegmentationTask(
            model=SemanticSegmentationModel(
                num_classes=21,
                input_size=[None, None, 3],
                backbone=backbones.Backbone(
                    type='dilated_vggnet',
                    dilated_vggnet=backbones.DilatedVGGNet(model_id=16)),
                decoder=decoders.Decoder(
                    type='aspp',
                    aspp=decoders.ASPP(level=level,
                                       dilation_rates=aspp_dilation_rates,
                                       stem_type='v2',
                                       num_filters=1024,
                                       use_sync_bn=True)),
                head=SegmentationHead(level=level,
                                      num_convs=0,
                                      low_level_num_filters=1024,
                                      feature_fusion='deeplabv2'),
                norm_activation=common.NormActivation(activation='swish',
                                                      norm_momentum=0.9997,
                                                      norm_epsilon=1e-3,
                                                      use_sync_bn=True)),
            losses=Losses(l2_weight_decay=1e-4),
            train_data=DataConfig(
                input_path=os.path.join(PASCAL_INPUT_PATH_BASE, 'train_aug*'),
                # TODO(arashwan): test changing size to 513 to match deeplab.
                output_size=[512, 512],
                is_training=True,
                global_batch_size=train_batch_size,
                aug_scale_min=0.5,
                aug_scale_max=1.5),
            validation_data=DataConfig(input_path=os.path.join(
                PASCAL_INPUT_PATH_BASE, 'val*'),
                                       output_size=[512, 512],
                                       is_training=False,
                                       global_batch_size=eval_batch_size,
                                       resize_eval_groundtruth=False,
                                       groundtruth_padded_size=[512, 512],
                                       drop_remainder=False),
            # resnet101
            init_checkpoint='/home/gunho1123/ckpt_vggnet16_deeplab/',
            init_checkpoint_modules='backbone'),
        trainer=cfg.TrainerConfig(
            steps_per_loop=steps_per_epoch,
            summary_interval=steps_per_epoch,
            checkpoint_interval=steps_per_epoch,
            train_steps=45 * steps_per_epoch,
            validation_steps=PASCAL_VAL_EXAMPLES // eval_batch_size,
            validation_interval=steps_per_epoch,
            optimizer_config=optimization.OptimizationConfig({
                'optimizer': {
                    'type': 'sgd',
                    'sgd': {
                        'momentum': 0.9
                    }
                },
                'learning_rate': {
                    'type': 'polynomial',
                    'polynomial': {
                        'initial_learning_rate': 0.007,
                        'decay_steps': 45 * steps_per_epoch,
                        'end_learning_rate': 0.0,
                        'power': 0.9
                    }
                },
                'warmup': {
                    'type': 'linear',
                    'linear': {
                        'warmup_steps': 5 * steps_per_epoch,
                        'warmup_learning_rate': 0
                    }
                }
            })),
        restrictions=[
            'task.train_data.is_training != None',
            'task.validation_data.is_training != None'
        ])

    return config