def test_parser(self, output_size, dtype, is_training, aug_name, is_multilabel, decode_jpeg_only, image_format): serialized_example = tfexample_utils.create_classification_example( output_size[0], output_size[1], image_format, is_multilabel) if aug_name == 'randaug': aug_type = common.Augmentation( type=aug_name, randaug=common.RandAugment(magnitude=10)) elif aug_name == 'autoaug': aug_type = common.Augmentation( type=aug_name, autoaug=common.AutoAugment(augmentation_name='test')) else: aug_type = None decoder = classification_input.Decoder(image_field_key=IMAGE_FIELD_KEY, label_field_key=LABEL_FIELD_KEY, is_multilabel=is_multilabel) parser = classification_input.Parser(output_size=output_size[:2], num_classes=10, image_field_key=IMAGE_FIELD_KEY, label_field_key=LABEL_FIELD_KEY, is_multilabel=is_multilabel, decode_jpeg_only=decode_jpeg_only, aug_rand_hflip=False, aug_type=aug_type, dtype=dtype) decoded_tensors = decoder.decode(serialized_example) image, label = parser.parse_fn(is_training)(decoded_tensors) self.assertAllEqual(image.numpy().shape, output_size) if not is_multilabel: self.assertAllEqual(label, 0) else: self.assertAllEqual(label.numpy().shape, [10]) if dtype == 'float32': self.assertAllEqual(image.dtype, tf.float32) elif dtype == 'float16': self.assertAllEqual(image.dtype, tf.float16) elif dtype == 'bfloat16': self.assertAllEqual(image.dtype, tf.bfloat16)
class Parser(hyperparams.Config): """Panoptic deeplab parser.""" ignore_label: int = 0 # If resize_eval_groundtruth is set to False, original image sizes are used # for eval. In that case, groundtruth_padded_size has to be specified too to # allow for batching the variable input sizes of images. resize_eval_groundtruth: bool = True groundtruth_padded_size: List[int] = dataclasses.field( default_factory=list) aug_scale_min: float = 1.0 aug_scale_max: float = 1.0 aug_rand_hflip: bool = True aug_type: common.Augmentation = common.Augmentation() sigma: float = 8.0 small_instance_area_threshold: int = 4096 small_instance_weight: float = 3.0 dtype = 'float32'
def test_video_input_augmentation_returns_shape(self): params = exp_cfg.kinetics600(is_training=True) params.feature_shape = (2, 224, 224, 3) params.min_image_size = 224 params.temporal_stride = 2 params.aug_type = common.Augmentation(type='autoaug', autoaug=common.AutoAugment()) decoder = video_input.Decoder() parser = video_input.Parser(params).parse_fn(params.is_training) seq_example, label = fake_seq_example() input_tensor = tf.constant(seq_example.SerializeToString()) decoded_tensors = decoder.decode(input_tensor) output_tensor = parser(decoded_tensors) image_features, label = output_tensor image = image_features['image'] self.assertAllEqual(image.shape, (2, 224, 224, 3)) self.assertAllEqual(label.shape, (600, ))
def mobilenet_edgetpu_base_experiment_config( model_name: str) -> cfg.ExperimentConfig: """Image classification on imagenet with mobilenet_edgetpu. Experiment config common across all mobilenet_edgetpu variants. Args: model_name: Name of the mobilenet_edgetpu model variant Returns: ExperimentConfig """ train_batch_size = 4096 eval_batch_size = 4096 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size mobilenet_edgetpu_config = MobilenetEdgeTPUModelConfig( num_classes=1001, input_size=[224, 224, 3]) mobilenet_edgetpu_config.model_params.model_name = model_name config = cfg.ExperimentConfig( task=MobilenetEdgeTPUTaskConfig( model=mobilenet_edgetpu_config, losses=base_config.Losses(label_smoothing=0.1), train_data=base_config.DataConfig( input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, dtype='bfloat16', aug_type=common.Augmentation(type='autoaug')), validation_data=base_config.DataConfig( input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, dtype='bfloat16', drop_remainder=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch * 5, max_to_keep=10, train_steps=550 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'rmsprop', 'rmsprop': { 'rho': 0.9, 'momentum': 0.9, 'epsilon': 0.001, } }, 'ema': { 'average_decay': 0.99, 'trainable_weights_only': False, }, 'learning_rate': { 'type': 'exponential', 'exponential': { 'initial_learning_rate': 0.008 * (train_batch_size // 128), 'decay_steps': int(2.4 * steps_per_epoch), 'decay_rate': 0.97, 'staircase': True } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } }, })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def image_classification_imagenet_resnetrs() -> cfg.ExperimentConfig: """Image classification on imagenet with resnet-rs.""" train_batch_size = 4096 eval_batch_size = 4096 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( task=ImageClassificationTask( model=ImageClassificationModel( num_classes=1001, input_size=[160, 160, 3], backbone=backbones.Backbone( type='resnet', resnet=backbones.ResNet(model_id=50, stem_type='v1', resnetd_shortcut=True, replace_stem_max_pool=True, se_ratio=0.25, stochastic_depth_drop_rate=0.0)), dropout_rate=0.25, norm_activation=common.NormActivation(norm_momentum=0.0, norm_epsilon=1e-5, use_sync_bn=False, activation='swish')), losses=Losses(l2_weight_decay=4e-5, label_smoothing=0.1), train_data=DataConfig( input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, aug_type=common.Augmentation( type='randaug', randaug=common.RandAugment(magnitude=10))), validation_data=DataConfig(input_path=os.path.join( IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=350 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'ema': { 'average_decay': 0.9999, 'trainable_weights_only': False, }, 'learning_rate': { 'type': 'cosine', 'cosine': { 'initial_learning_rate': 1.6, 'decay_steps': 350 * steps_per_epoch } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def image_classification_imagenet_deit_pretrain() -> cfg.ExperimentConfig: """Image classification on imagenet with vision transformer.""" train_batch_size = 4096 # originally was 1024 but 4096 better for tpu v3-32 eval_batch_size = 4096 # originally was 1024 but 4096 better for tpu v3-32 num_classes = 1001 label_smoothing = 0.1 steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size config = cfg.ExperimentConfig( task=ImageClassificationTask( model=ImageClassificationModel( num_classes=num_classes, input_size=[224, 224, 3], kernel_initializer='zeros', backbone=backbones.Backbone( type='vit', vit=backbones.VisionTransformer( model_name='vit-b16', representation_size=768, init_stochastic_depth_rate=0.1, original_init=False, transformer=backbones.Transformer( dropout_rate=0.0, attention_dropout_rate=0.0)))), losses=Losses( l2_weight_decay=0.0, label_smoothing=label_smoothing, one_hot=False, soft_labels=True), train_data=DataConfig( input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, aug_type=common.Augmentation( type='randaug', randaug=common.RandAugment( magnitude=9, exclude_ops=['Cutout'])), mixup_and_cutmix=common.MixupAndCutmix( label_smoothing=label_smoothing)), validation_data=DataConfig( input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'), is_training=False, global_batch_size=eval_batch_size)), trainer=cfg.TrainerConfig( steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, train_steps=300 * steps_per_epoch, validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, validation_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'adamw', 'adamw': { 'weight_decay_rate': 0.05, 'include_in_weight_decay': r'.*(kernel|weight):0$', 'gradient_clip_norm': 0.0 } }, 'learning_rate': { 'type': 'cosine', 'cosine': { 'initial_learning_rate': 0.0005 * train_batch_size / 512, 'decay_steps': 300 * steps_per_epoch, } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 5 * steps_per_epoch, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config
def panoptic_deeplab_coco() -> cfg.ExperimentConfig: """COCO panoptic segmentation with Panoptic Deeplab.""" train_steps = 200000 train_batch_size = 64 eval_batch_size = 1 steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size num_panoptic_categories = 201 num_thing_categories = 91 ignore_label = 0 is_thing = [False] for idx in range(1, num_panoptic_categories): is_thing.append(True if idx <= num_thing_categories else False) input_size = [640, 640, 3] output_stride = 16 aspp_dilation_rates = [6, 12, 18] multigrid = [1, 2, 4] stem_type = 'v1' level = int(np.math.log2(output_stride)) config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16', enable_xla=True), task=PanopticDeeplabTask( init_checkpoint= 'gs://tf_model_garden/vision/panoptic/panoptic_deeplab/imagenet/resnet50_v1/ckpt-436800', # pylint: disable=line-too-long init_checkpoint_modules=['backbone'], model=PanopticDeeplab( num_classes=num_panoptic_categories, input_size=input_size, backbone=backbones.Backbone( type='dilated_resnet', dilated_resnet=backbones.DilatedResNet( model_id=50, stem_type=stem_type, output_stride=output_stride, multigrid=multigrid, se_ratio=0.25, last_stage_repeats=1, stochastic_depth_drop_rate=0.2)), decoder=decoders.Decoder( type='aspp', aspp=decoders.ASPP(level=level, num_filters=256, pool_kernel_size=input_size[:2], dilation_rates=aspp_dilation_rates, use_depthwise_convolution=True, dropout_rate=0.1)), semantic_head=SemanticHead(level=level, num_convs=1, num_filters=256, kernel_size=5, use_depthwise_convolution=True, upsample_factor=1, low_level=[3, 2], low_level_num_filters=[64, 32], fusion_num_output_filters=256, prediction_kernel_size=1), instance_head=InstanceHead(level=level, num_convs=1, num_filters=32, kernel_size=5, use_depthwise_convolution=True, upsample_factor=1, low_level=[3, 2], low_level_num_filters=[32, 16], fusion_num_output_filters=128, prediction_kernel_size=1), shared_decoder=False, generate_panoptic_masks=True, post_processor=PanopticDeeplabPostProcessor( output_size=input_size[:2], center_score_threshold=0.1, thing_class_ids=list(range(1, num_thing_categories)), label_divisor=256, stuff_area_limit=4096, ignore_label=ignore_label, nms_kernel=41, keep_k_centers=200, rescale_predictions=True)), losses=Losses(label_smoothing=0.0, ignore_label=ignore_label, l2_weight_decay=0.0, top_k_percent_pixels=0.2, segmentation_loss_weight=1.0, center_heatmap_loss_weight=200, center_offset_loss_weight=0.01), train_data=DataConfig( input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser( aug_scale_min=0.5, aug_scale_max=1.5, aug_rand_hflip=True, aug_type=common.Augmentation( type='autoaug', autoaug=common.AutoAugment( augmentation_name='panoptic_deeplab_policy')), sigma=8.0, small_instance_area_threshold=4096, small_instance_weight=3.0)), validation_data=DataConfig( input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, parser=Parser(resize_eval_groundtruth=False, groundtruth_padded_size=[640, 640], aug_scale_min=1.0, aug_scale_max=1.0, aug_rand_hflip=False, aug_type=None, sigma=8.0, small_instance_area_threshold=4096, small_instance_weight=3.0), drop_remainder=False), evaluation=Evaluation(ignored_label=ignore_label, max_instances_per_category=256, offset=256 * 256 * 256, is_thing=is_thing, rescale_predictions=True, report_per_class_pq=False, report_per_class_iou=False, report_train_mean_iou=False)), trainer=cfg.TrainerConfig( train_steps=train_steps, validation_steps=validation_steps, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'adam', }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.0005, 'decay_steps': train_steps, 'end_learning_rate': 0.0, 'power': 0.9 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 2000, 'warmup_learning_rate': 0 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config