class VideoClassificationModel(hyperparams.Config): """The model config.""" model_type: str = 'video_classification' backbone: backbones_3d.Backbone3D = backbones_3d.Backbone3D( type='resnet_3d', resnet_3d=backbones_3d.ResNet3D50()) norm_activation: common.NormActivation = common.NormActivation( use_sync_bn=False) dropout_rate: float = 0.2 aggregate_endpoints: bool = False require_endpoints: Optional[Tuple[str, ...]] = None
def test_builder(self, backbone_type, input_size, weight_decay): input_specs = tf.keras.layers.InputSpec( shape=[None, input_size[0], input_size[1], input_size[2], 3]) model_config = video_classification_cfg.VideoClassificationModel( backbone=backbones_3d.Backbone3D(type=backbone_type)) l2_regularizer = (tf.keras.regularizers.l2(weight_decay) if weight_decay else None) _ = factory_3d.build_video_classification_model( input_specs=input_specs, model_config=model_config, num_classes=2, l2_regularizer=l2_regularizer)
def test_resnet_3d_creation(self, model_type): """Test creation of ResNet 3D models.""" backbone_cfg = backbones_3d_cfg.Backbone3D(type=model_type).get() temporal_strides = [] temporal_kernel_sizes = [] for block_spec in backbone_cfg.block_specs: temporal_strides.append(block_spec.temporal_strides) temporal_kernel_sizes.append(block_spec.temporal_kernel_sizes) _ = backbones.ResNet3D(model_id=backbone_cfg.model_id, temporal_strides=temporal_strides, temporal_kernel_sizes=temporal_kernel_sizes, norm_momentum=0.99, norm_epsilon=1e-5)
def video_classification_ucf101() -> cfg.ExperimentConfig: """Video classification on UCF-101 with resnet.""" train_dataset = DataConfig( name='ucf101', num_classes=101, is_training=True, split='train', drop_remainder=True, num_examples=9537, temporal_stride=2, feature_shape=(32, 224, 224, 3)) train_dataset.tfds_name = 'ucf101' train_dataset.tfds_split = 'train' validation_dataset = DataConfig( name='ucf101', num_classes=101, is_training=True, split='test', drop_remainder=False, num_examples=3783, temporal_stride=2, feature_shape=(32, 224, 224, 3)) validation_dataset.tfds_name = 'ucf101' validation_dataset.tfds_split = 'test' task = VideoClassificationTask( model=VideoClassificationModel( backbone=backbones_3d.Backbone3D( type='resnet_3d', resnet_3d=backbones_3d.ResNet3D50()), norm_activation=common.NormActivation( norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)), losses=Losses(l2_weight_decay=1e-4), train_data=train_dataset, validation_data=validation_dataset) config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=task, restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', 'task.train_data.num_classes == task.validation_data.num_classes', ]) add_trainer( config, train_batch_size=64, eval_batch_size=16, learning_rate=0.8, train_epochs=100) return config
def video_classification_kinetics700_2020() -> cfg.ExperimentConfig: """Video classification on Kinectics 700 2020 with resnet.""" train_dataset = kinetics700_2020(is_training=True) validation_dataset = kinetics700_2020(is_training=False) task = VideoClassificationTask(model=VideoClassificationModel( backbone=backbones_3d.Backbone3D(type='resnet_3d', resnet_3d=backbones_3d.ResNet3D50()), norm_activation=common.NormActivation(norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)), losses=Losses(l2_weight_decay=1e-4), train_data=train_dataset, validation_data=validation_dataset) config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), task=task, restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None', 'task.train_data.num_classes == task.validation_data.num_classes', ]) add_trainer(config, train_batch_size=1024, eval_batch_size=64) return config