def main(_) -> None: backbone_2plus1d = movinet.Movinet( model_id=FLAGS.model_id, causal=FLAGS.causal, conv_type='2plus1d', use_positional_encoding=FLAGS.use_positional_encoding) model_2plus1d = movinet_model.MovinetClassifier( backbone=backbone_2plus1d, num_classes=FLAGS.num_classes) model_2plus1d.build([1, 1, 1, 1, 3]) backbone_3d_2plus1d = movinet.Movinet( model_id=FLAGS.model_id, causal=FLAGS.causal, conv_type='3d_2plus1d', use_positional_encoding=FLAGS.use_positional_encoding) model_3d_2plus1d = movinet_model.MovinetClassifier( backbone=backbone_3d_2plus1d, num_classes=FLAGS.num_classes) model_3d_2plus1d.build([1, 1, 1, 1, 3]) checkpoint = tf.train.Checkpoint(model=model_3d_2plus1d) status = checkpoint.restore(FLAGS.input_checkpoint_path) status.assert_existing_objects_matched() # Ensure both models have the same weights weights = [] for var_2plus1d, var_3d_2plus1d in zip( model_2plus1d.get_weights(), model_3d_2plus1d.get_weights()): if var_2plus1d.shape == var_3d_2plus1d.shape: weights.append(var_3d_2plus1d) else: if var_3d_2plus1d.shape[0] == 1: weight = var_3d_2plus1d[0] else: weight = var_3d_2plus1d[:, 0] if weight.shape[-1] != var_2plus1d.shape[-1]: # Transpose any depthwise kernels (conv3d --> depthwise_conv2d) weight = tf.transpose(weight, perm=(0, 1, 3, 2)) weights.append(weight) model_2plus1d.set_weights(weights) if FLAGS.verify_output: inputs = tf.random.uniform([1, 6, 64, 64, 3], dtype=tf.float32) logits_2plus1d = model_2plus1d(inputs) logits_3d_2plus1d = model_3d_2plus1d(inputs) if tf.reduce_mean(logits_2plus1d - logits_3d_2plus1d) > 1e-5: raise ValueError('Bad conversion, model outputs do not match.') save_checkpoint = tf.train.Checkpoint( model=model_2plus1d, backbone=backbone_2plus1d) save_checkpoint.save(FLAGS.output_checkpoint_path)
def test_movinet_classifier_stream_pos_enc(self): """Test if the classifier can be run in streaming mode with pos encoding.""" tf.keras.backend.set_image_data_format('channels_last') backbone = movinet.Movinet( model_id='a0', causal=True, use_external_states=True, use_positional_encoding=True, ) model = movinet_model.MovinetClassifier(backbone, num_classes=600, output_states=True) inputs = tf.ones([1, 8, 172, 172, 3]) init_states = model.init_states(tf.shape(inputs)) expected, _ = model({**init_states, 'image': inputs}) frames = tf.split(inputs, inputs.shape[1], axis=1) states = init_states for frame in frames: output, states = model({**states, 'image': frame}) predicted = output self.assertEqual(predicted.shape, expected.shape) self.assertAllClose(predicted, expected, 1e-5, 1e-5)
def test_convert_model(self): saved_model_path = self.get_temp_dir() input_checkpoint_path = os.path.join(saved_model_path, 'ckpt-input') output_checkpoint_path = os.path.join(saved_model_path, 'ckpt') model_3d_2plus1d = movinet_model.MovinetClassifier( backbone=movinet.Movinet(model_id='a0', conv_type='3d_2plus1d'), num_classes=600) model_3d_2plus1d.build([1, 1, 1, 1, 3]) save_checkpoint = tf.train.Checkpoint(model=model_3d_2plus1d) save_checkpoint.save(input_checkpoint_path) FLAGS.input_checkpoint_path = f'{input_checkpoint_path}-1' FLAGS.output_checkpoint_path = output_checkpoint_path FLAGS.model_id = 'a0' FLAGS.use_positional_encoding = False FLAGS.num_classes = 600 FLAGS.verify_output = True convert_3d_2plus1d.main('unused_args') print(os.listdir(saved_model_path)) self.assertTrue( tf.io.gfile.exists(f'{output_checkpoint_path}-1.index'))
def test_movinet_classifier_mobile(self): """Test if the model can run with mobile parameters.""" tf.keras.backend.set_image_data_format('channels_last') backbone = movinet.Movinet( model_id='a0', causal=True, use_external_states=True, conv_type='2plus1d', se_type='2plus3d', activation='hard_swish', gating_activation='hard_sigmoid' ) model = movinet_model.MovinetClassifier( backbone, num_classes=600, output_states=True) inputs = tf.ones([1, 8, 172, 172, 3]) init_states = model.init_states(tf.shape(inputs)) expected, _ = model({**init_states, 'image': inputs}) frames = tf.split(inputs, inputs.shape[1], axis=1) states = init_states for frame in frames: output, states = model({**states, 'image': frame}) predicted = output self.assertEqual(predicted.shape, expected.shape) self.assertAllClose(predicted, expected, 1e-5, 1e-5)
def test_movinet_a0_2plus1d(self): """Test creation of MoViNet with 2plus1d configuration.""" tf.keras.backend.set_image_data_format('channels_last') model_2plus1d = movinet_model.MovinetClassifier( backbone=movinet.Movinet( model_id='a0', conv_type='2plus1d'), num_classes=600) model_2plus1d.build([1, 1, 1, 1, 3]) model_3d_2plus1d = movinet_model.MovinetClassifier( backbone=movinet.Movinet( model_id='a0', conv_type='3d_2plus1d'), num_classes=600) model_3d_2plus1d.build([1, 1, 1, 1, 3]) # Ensure both models have the same weights weights = [] for var_2plus1d, var_3d_2plus1d in zip( model_2plus1d.get_weights(), model_3d_2plus1d.get_weights()): if var_2plus1d.shape == var_3d_2plus1d.shape: weights.append(var_3d_2plus1d) else: if var_3d_2plus1d.shape[0] == 1: weight = var_3d_2plus1d[0] else: weight = var_3d_2plus1d[:, 0] if weight.shape[-1] != var_2plus1d.shape[-1]: # Transpose any depthwise kernels (conv3d --> depthwise_conv2d) weight = tf.transpose(weight, perm=(0, 1, 3, 2)) weights.append(weight) model_2plus1d.set_weights(weights) inputs = tf.ones([2, 8, 172, 172, 3], dtype=tf.float32) logits_2plus1d = model_2plus1d(inputs) logits_3d_2plus1d = model_3d_2plus1d(inputs) # Ensure both models have the same output, since the weights are the same self.assertAllEqual(logits_2plus1d.shape, logits_3d_2plus1d.shape) self.assertAllClose(logits_2plus1d, logits_3d_2plus1d, 1e-5, 1e-5)
def test_movinet_models(self, model_id, expected_params_millions): """Test creation of MoViNet family models with states.""" tf.keras.backend.set_image_data_format('channels_last') model = movinet_model.MovinetClassifier(backbone=movinet.Movinet( model_id=model_id, causal=True), num_classes=600) model.build([1, 1, 1, 1, 3]) num_params_millions = model.count_params() / 1e6 self.assertEqual(num_params_millions, expected_params_millions)
def test_saved_model_save_load(self): backbone = movinet.Movinet('a0') model = movinet_model.MovinetClassifier(backbone, num_classes=600) model.build([1, 5, 172, 172, 3]) model.compile(metrics=['acc']) tf.keras.models.save_model(model, '/tmp/movinet/') loaded_model = tf.keras.models.load_model('/tmp/movinet/') output = loaded_model(dict(image=tf.ones([1, 1, 1, 1, 3]))) self.assertAllEqual(output.shape, [1, 600])
def test_serialize_deserialize(self): """Validate the classification network can be serialized and deserialized.""" backbone = movinet.Movinet(model_id='a0') model = movinet_model.MovinetClassifier(backbone=backbone, num_classes=1000) config = model.get_config() new_model = movinet_model.MovinetClassifier.from_config(config) # Validate that the config can be forced to JSON. new_model.to_json() # If the serialization was successful, the new config should match the old. self.assertAllEqual(model.get_config(), new_model.get_config())
def test_movinet_classifier_creation(self, is_training): """Test for creation of a Movinet classifier.""" temporal_size = 16 spatial_size = 224 tf.keras.backend.set_image_data_format('channels_last') input_specs = tf.keras.layers.InputSpec( shape=[None, temporal_size, spatial_size, spatial_size, 3]) backbone = movinet.Movinet(model_id='a0', input_specs=input_specs) num_classes = 1000 model = movinet_model.MovinetClassifier( backbone=backbone, num_classes=num_classes, input_specs={'image': input_specs}, dropout_rate=0.2) inputs = np.random.rand(2, temporal_size, spatial_size, spatial_size, 3) logits = model(inputs, training=is_training) self.assertAllEqual([2, num_classes], logits.shape)
def main(argv: Sequence[str]) -> None: if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') # Use dimensions of 1 except the channels to export faster, # since we only really need the last dimension to build and get the output # states. These dimensions will be set to `None` once the model is built. input_shape = [1, 1, 1, 1, 3] backbone = movinet.Movinet(FLAGS.model_id, causal=FLAGS.causal, conv_type=FLAGS.conv_type) model = movinet_model.MovinetClassifier(backbone, num_classes=FLAGS.num_classes, output_states=FLAGS.causal) model.build(input_shape) if FLAGS.checkpoint_path: model.load_weights(FLAGS.checkpoint_path) if FLAGS.causal: # Call the model once to get the output states. Call again with `states` # input to ensure that the inputs with the `states` argument is built _, states = model(dict(image=tf.ones(input_shape), states={})) _, states = model(dict(image=tf.ones(input_shape), states=states)) input_spec = tf.TensorSpec(shape=[None, None, None, None, 3], dtype=tf.float32, name='inputs') state_specs = {} for name, state in states.items(): shape = state.shape if len(state.shape) == 5: shape = [None, state.shape[1], None, None, state.shape[-1]] new_spec = tf.TensorSpec(shape=shape, dtype=state.dtype, name=name) state_specs[name] = new_spec specs = (input_spec, state_specs) # Define a tf.keras.Model with custom signatures to allow it to accept # a state dict as an argument. We define it inline here because # we first need to determine the shape of the state tensors before # applying the `input_signature` argument to `tf.function`. class ExportStateModule(tf.Module): """Module with state for exporting to saved_model.""" def __init__(self, model): self.model = model @tf.function(input_signature=[input_spec]) def __call__(self, inputs): return self.model(dict(image=inputs, states={})) @tf.function(input_signature=[input_spec]) def base(self, inputs): return self.model(dict(image=inputs, states={})) @tf.function(input_signature=specs) def stream(self, inputs, states): return self.model(dict(image=inputs, states=states)) module = ExportStateModule(model) tf.saved_model.save(module, FLAGS.output_path) else: _ = model(tf.ones(input_shape)) tf.keras.models.save_model(model, FLAGS.output_path) print(' ----- Done. Saved Model is saved at {}'.format(FLAGS.output_path))
def main(_) -> None: input_specs = tf.keras.layers.InputSpec(shape=[ FLAGS.batch_size, FLAGS.num_frames, FLAGS.image_size, FLAGS.image_size, 3, ]) # Use dimensions of 1 except the channels to export faster, # since we only really need the last dimension to build and get the output # states. These dimensions will be set to `None` once the model is built. input_shape = [1 if s is None else s for s in input_specs.shape] backbone = movinet.Movinet( FLAGS.model_id, causal=FLAGS.causal, conv_type=FLAGS.conv_type, use_external_states=FLAGS.causal, input_specs=input_specs, activation=FLAGS.activation, gating_activation=FLAGS.gating_activation, se_type=FLAGS.se_type, use_positional_encoding=FLAGS.use_positional_encoding) model = movinet_model.MovinetClassifier( backbone, num_classes=FLAGS.num_classes, output_states=FLAGS.causal, input_specs=dict(image=input_specs)) model.build(input_shape) # Compile model to generate some internal Keras variables. model.compile() if FLAGS.checkpoint_path: checkpoint = tf.train.Checkpoint(model=model) status = checkpoint.restore(FLAGS.checkpoint_path) status.assert_existing_objects_matched() if FLAGS.causal: # Call the model once to get the output states. Call again with `states` # input to ensure that the inputs with the `states` argument is built # with the full output state shapes. input_image = tf.ones(input_shape) _, states = model({ **model.init_states(input_shape), 'image': input_image }) _, states = model({**states, 'image': input_image}) # Create a function to explicitly set the names of the outputs def predict(inputs): outputs, states = model(inputs) return {**states, 'logits': outputs} specs = { name: tf.TensorSpec(spec.shape, name=name, dtype=spec.dtype) for name, spec in model.initial_state_specs( input_specs.shape).items() } specs['image'] = tf.TensorSpec(input_specs.shape, dtype=model.dtype, name='image') predict_fn = tf.function(predict, jit_compile=True) predict_fn = predict_fn.get_concrete_function(specs) init_states_fn = tf.function(model.init_states, jit_compile=True) init_states_fn = init_states_fn.get_concrete_function( tf.TensorSpec([5], dtype=tf.int32)) signatures = {'call': predict_fn, 'init_states': init_states_fn} tf.keras.models.save_model(model, FLAGS.export_path, signatures=signatures) else: _ = model(tf.ones(input_shape)) tf.keras.models.save_model(model, FLAGS.export_path) print(' ----- Done. Saved Model is saved at {}'.format(FLAGS.export_path))