コード例 #1
0
def main(_) -> None:
    backbone_2plus1d = movinet.Movinet(
        model_id=FLAGS.model_id,
        causal=FLAGS.causal,
        conv_type='2plus1d',
        se_type=FLAGS.se_type,
        use_positional_encoding=FLAGS.use_positional_encoding)
    model_2plus1d = movinet_model.MovinetClassifier(
        backbone=backbone_2plus1d, num_classes=FLAGS.num_classes)
    model_2plus1d.build([1, 1, 1, 1, 3])

    backbone_3d_2plus1d = movinet.Movinet(
        model_id=FLAGS.model_id,
        causal=FLAGS.causal,
        conv_type='3d_2plus1d',
        se_type=FLAGS.se_type,
        use_positional_encoding=FLAGS.use_positional_encoding)
    model_3d_2plus1d = movinet_model.MovinetClassifier(
        backbone=backbone_3d_2plus1d, num_classes=FLAGS.num_classes)
    model_3d_2plus1d.build([1, 1, 1, 1, 3])

    checkpoint = tf.train.Checkpoint(model=model_3d_2plus1d)
    status = checkpoint.restore(FLAGS.input_checkpoint_path)
    status.assert_existing_objects_matched()

    # Ensure both models have the same weights
    weights = []
    for var_2plus1d, var_3d_2plus1d in zip(model_2plus1d.get_weights(),
                                           model_3d_2plus1d.get_weights()):
        if var_2plus1d.shape == var_3d_2plus1d.shape:
            weights.append(var_3d_2plus1d)
        else:
            if var_3d_2plus1d.shape[0] == 1:
                weight = var_3d_2plus1d[0]
            else:
                weight = var_3d_2plus1d[:, 0]
            if weight.shape[-1] != var_2plus1d.shape[-1]:
                # Transpose any depthwise kernels (conv3d --> depthwise_conv2d)
                weight = tf.transpose(weight, perm=(0, 1, 3, 2))
            weights.append(weight)
    model_2plus1d.set_weights(weights)

    if FLAGS.verify_output:
        inputs = tf.random.uniform([1, 6, 64, 64, 3], dtype=tf.float32)

        logits_2plus1d = model_2plus1d(inputs)
        logits_3d_2plus1d = model_3d_2plus1d(inputs)

        if tf.reduce_mean(logits_2plus1d - logits_3d_2plus1d) > 1e-5:
            raise ValueError('Bad conversion, model outputs do not match.')

    save_checkpoint = tf.train.Checkpoint(model=model_2plus1d,
                                          backbone=backbone_2plus1d)
    save_checkpoint.save(FLAGS.output_checkpoint_path)
コード例 #2
0
    def test_movinet_classifier_stream_pos_enc(self):
        """Test if the classifier can be run in streaming mode with pos encoding."""
        tf.keras.backend.set_image_data_format('channels_last')

        backbone = movinet.Movinet(
            model_id='a0',
            causal=True,
            use_external_states=True,
            use_positional_encoding=True,
        )
        model = movinet_model.MovinetClassifier(backbone,
                                                num_classes=600,
                                                output_states=True)

        inputs = tf.ones([1, 8, 172, 172, 3])

        init_states = model.init_states(tf.shape(inputs))
        expected, _ = model({**init_states, 'image': inputs})

        frames = tf.split(inputs, inputs.shape[1], axis=1)

        states = init_states
        for frame in frames:
            output, states = model({**states, 'image': frame})
        predicted = output

        self.assertEqual(predicted.shape, expected.shape)
        self.assertAllClose(predicted, expected, 1e-5, 1e-5)
コード例 #3
0
    def test_movinet_classifier_mobile(self):
        """Test if the model can run with mobile parameters."""
        tf.keras.backend.set_image_data_format('channels_last')

        backbone = movinet.Movinet(model_id='a0',
                                   causal=True,
                                   use_external_states=True,
                                   conv_type='2plus1d',
                                   se_type='2plus3d',
                                   activation='hard_swish',
                                   gating_activation='hard_sigmoid')
        model = movinet_model.MovinetClassifier(backbone,
                                                num_classes=600,
                                                output_states=True)

        inputs = tf.ones([1, 8, 172, 172, 3])

        init_states = model.init_states(tf.shape(inputs))
        expected, _ = model({**init_states, 'image': inputs})

        frames = tf.split(inputs, inputs.shape[1], axis=1)

        states = init_states
        for frame in frames:
            output, states = model({**states, 'image': frame})
        predicted = output

        self.assertEqual(predicted.shape, expected.shape)
        self.assertAllClose(predicted, expected, 1e-5, 1e-5)
コード例 #4
0
ファイル: movinet_test.py プロジェクト: vishalbelsare/models
  def test_movinet_stream(self):
    """Test if the backbone can be run in streaming mode."""
    tf.keras.backend.set_image_data_format('channels_last')

    backbone = movinet.Movinet(
        model_id='a0',
        causal=True,
        use_external_states=True,
    )
    inputs = tf.ones([1, 5, 128, 128, 3])

    init_states = backbone.init_states(tf.shape(inputs))
    expected_endpoints, _ = backbone({**init_states, 'image': inputs})

    frames = tf.split(inputs, inputs.shape[1], axis=1)

    states = init_states
    for frame in frames:
      output, states = backbone({**states, 'image': frame})
    predicted_endpoints = output

    predicted = predicted_endpoints['head']

    # The expected final output is simply the mean across frames
    expected = expected_endpoints['head']
    expected = tf.reduce_mean(expected, 1, keepdims=True)

    self.assertEqual(predicted.shape, expected.shape)
    self.assertAllClose(predicted, expected, 1e-5, 1e-5)
コード例 #5
0
    def test_convert_model(self):
        saved_model_path = self.get_temp_dir()
        input_checkpoint_path = os.path.join(saved_model_path, 'ckpt-input')
        output_checkpoint_path = os.path.join(saved_model_path, 'ckpt')

        model_3d_2plus1d = movinet_model.MovinetClassifier(
            backbone=movinet.Movinet(model_id='a0',
                                     conv_type='3d_2plus1d',
                                     se_type='2plus3d'),
            num_classes=600)
        model_3d_2plus1d.build([1, 1, 1, 1, 3])
        save_checkpoint = tf.train.Checkpoint(model=model_3d_2plus1d)
        save_checkpoint.save(input_checkpoint_path)

        FLAGS.input_checkpoint_path = f'{input_checkpoint_path}-1'
        FLAGS.output_checkpoint_path = output_checkpoint_path
        FLAGS.model_id = 'a0'
        FLAGS.use_positional_encoding = False
        FLAGS.num_classes = 600
        FLAGS.verify_output = True

        convert_3d_2plus1d.main('unused_args')

        print(os.listdir(saved_model_path))

        self.assertTrue(
            tf.io.gfile.exists(f'{output_checkpoint_path}-1.index'))
コード例 #6
0
    def test_network_with_states(self):
        """Test creation of MoViNet family models with states."""
        tf.keras.backend.set_image_data_format('channels_last')

        backbone = movinet.Movinet(
            model_id='a0',
            causal=True,
            use_external_states=True,
        )
        inputs = tf.ones([1, 8, 128, 128, 3])

        init_states = backbone.init_states(tf.shape(inputs))
        endpoints, new_states = backbone({**init_states, 'image': inputs})

        self.assertAllEqual(endpoints['stem'].shape, [1, 8, 64, 64, 8])
        self.assertAllEqual(endpoints['block0_layer0'].shape,
                            [1, 8, 32, 32, 8])
        self.assertAllEqual(endpoints['block1_layer0'].shape,
                            [1, 8, 16, 16, 32])
        self.assertAllEqual(endpoints['block2_layer0'].shape, [1, 8, 8, 8, 56])
        self.assertAllEqual(endpoints['block3_layer0'].shape, [1, 8, 8, 8, 56])
        self.assertAllEqual(endpoints['block4_layer0'].shape,
                            [1, 8, 4, 4, 104])
        self.assertAllEqual(endpoints['head'].shape, [1, 1, 1, 1, 480])

        self.assertNotEmpty(init_states)
        self.assertNotEmpty(new_states)
コード例 #7
0
    def test_movinet_models(self, model_id, expected_params_millions):
        """Test creation of MoViNet family models with states."""
        tf.keras.backend.set_image_data_format('channels_last')

        model = movinet_model.MovinetClassifier(backbone=movinet.Movinet(
            model_id=model_id, causal=True),
                                                num_classes=600)
        model.build([1, 1, 1, 1, 3])
        num_params_millions = model.count_params() / 1e6

        self.assertEqual(num_params_millions, expected_params_millions)
コード例 #8
0
    def test_saved_model_save_load(self):
        backbone = movinet.Movinet('a0')
        model = movinet_model.MovinetClassifier(backbone, num_classes=600)
        model.build([1, 5, 172, 172, 3])
        model.compile(metrics=['acc'])

        tf.keras.models.save_model(model, '/tmp/movinet/')
        loaded_model = tf.keras.models.load_model('/tmp/movinet/')

        output = loaded_model(dict(image=tf.ones([1, 1, 1, 1, 3])))

        self.assertAllEqual(output.shape, [1, 600])
コード例 #9
0
    def test_movinet_a0_2plus1d(self):
        """Test creation of MoViNet with 2plus1d configuration."""
        tf.keras.backend.set_image_data_format('channels_last')

        model_2plus1d = movinet_model.MovinetClassifier(
            backbone=movinet.Movinet(model_id='a0', conv_type='2plus1d'),
            num_classes=600)
        model_2plus1d.build([1, 1, 1, 1, 3])

        model_3d_2plus1d = movinet_model.MovinetClassifier(
            backbone=movinet.Movinet(model_id='a0', conv_type='3d_2plus1d'),
            num_classes=600)
        model_3d_2plus1d.build([1, 1, 1, 1, 3])

        # Ensure both models have the same weights
        weights = []
        for var_2plus1d, var_3d_2plus1d in zip(model_2plus1d.get_weights(),
                                               model_3d_2plus1d.get_weights()):
            if var_2plus1d.shape == var_3d_2plus1d.shape:
                weights.append(var_3d_2plus1d)
            else:
                if var_3d_2plus1d.shape[0] == 1:
                    weight = var_3d_2plus1d[0]
                else:
                    weight = var_3d_2plus1d[:, 0]
                if weight.shape[-1] != var_2plus1d.shape[-1]:
                    # Transpose any depthwise kernels (conv3d --> depthwise_conv2d)
                    weight = tf.transpose(weight, perm=(0, 1, 3, 2))
                weights.append(weight)
        model_2plus1d.set_weights(weights)

        inputs = tf.ones([2, 8, 172, 172, 3], dtype=tf.float32)

        logits_2plus1d = model_2plus1d(inputs)
        logits_3d_2plus1d = model_3d_2plus1d(inputs)

        # Ensure both models have the same output, since the weights are the same
        self.assertAllEqual(logits_2plus1d.shape, logits_3d_2plus1d.shape)
        self.assertAllClose(logits_2plus1d, logits_3d_2plus1d, 1e-5, 1e-5)
コード例 #10
0
    def test_serialize_deserialize(self):
        """Validate the classification network can be serialized and deserialized."""

        backbone = movinet.Movinet(model_id='a0')

        model = movinet_model.MovinetClassifier(backbone=backbone,
                                                num_classes=1000)

        config = model.get_config()
        new_model = movinet_model.MovinetClassifier.from_config(config)

        # Validate that the config can be forced to JSON.
        new_model.to_json()

        # If the serialization was successful, the new config should match the old.
        self.assertAllEqual(model.get_config(), new_model.get_config())
コード例 #11
0
ファイル: movinet_test.py プロジェクト: vishalbelsare/models
  def test_serialize_deserialize(self):
    # Create a network object that sets all of its config options.
    kwargs = dict(
        model_id='a0',
        causal=True,
        use_positional_encoding=True,
        use_external_states=True,
    )
    network = movinet.Movinet(**kwargs)

    # Create another network object from the first object's config.
    new_network = movinet.Movinet.from_config(network.get_config())

    # Validate that the config can be forced to JSON.
    _ = new_network.to_json()

    # If the serialization was successful, the new config should match the old.
    self.assertAllEqual(network.get_config(), new_network.get_config())
コード例 #12
0
    def test_movinet_stream_nse(self):
        """Test if the backbone can be run in streaming mode w/o SE layer."""
        tf.keras.backend.set_image_data_format('channels_last')

        backbone = movinet.Movinet(
            model_id='a0',
            causal=True,
            use_external_states=True,
            se_type='none',
        )
        inputs = tf.ones([1, 5, 128, 128, 3])

        init_states = backbone.init_states(tf.shape(inputs))
        expected_endpoints, _ = backbone({**init_states, 'image': inputs})

        frames = tf.split(inputs, inputs.shape[1], axis=1)

        states = init_states
        for frame in frames:
            output, states = backbone({**states, 'image': frame})
        predicted_endpoints = output

        predicted = predicted_endpoints['head']

        # The expected final output is simply the mean across frames
        expected = expected_endpoints['head']
        expected = tf.reduce_mean(expected, 1, keepdims=True)

        self.assertEqual(predicted.shape, expected.shape)
        self.assertAllClose(predicted, expected, 1e-5, 1e-5)

        # Check contents in the states dictionary.
        state_keys = list(init_states.keys())
        self.assertIn('state_head_pool_buffer', state_keys)
        self.assertIn('state_head_pool_frame_count', state_keys)
        state_keys.remove('state_head_pool_buffer')
        state_keys.remove('state_head_pool_frame_count')
        # From now on, there are only 'stream_buffer' for the convolutions.
        for state_key in state_keys:
            self.assertIn(
                'stream_buffer',
                state_key,
                msg=f'Expecting stream_buffer only, found {state_key}')
コード例 #13
0
ファイル: movinet_test.py プロジェクト: vishalbelsare/models
  def test_network_creation(self):
    """Test creation of MoViNet family models."""
    tf.keras.backend.set_image_data_format('channels_last')

    network = movinet.Movinet(
        model_id='a0',
        causal=True,
    )
    inputs = tf.keras.Input(shape=(8, 128, 128, 3), batch_size=1)
    endpoints, states = network(inputs)

    self.assertAllEqual(endpoints['stem'].shape, [1, 8, 64, 64, 8])
    self.assertAllEqual(endpoints['block0_layer0'].shape, [1, 8, 32, 32, 8])
    self.assertAllEqual(endpoints['block1_layer0'].shape, [1, 8, 16, 16, 32])
    self.assertAllEqual(endpoints['block2_layer0'].shape, [1, 8, 8, 8, 56])
    self.assertAllEqual(endpoints['block3_layer0'].shape, [1, 8, 8, 8, 56])
    self.assertAllEqual(endpoints['block4_layer0'].shape, [1, 8, 4, 4, 104])
    self.assertAllEqual(endpoints['head'].shape, [1, 1, 1, 1, 480])

    self.assertNotEmpty(states)
コード例 #14
0
    def test_movinet_classifier_creation(self, is_training):
        """Test for creation of a Movinet classifier."""
        temporal_size = 16
        spatial_size = 224
        tf.keras.backend.set_image_data_format('channels_last')

        input_specs = tf.keras.layers.InputSpec(
            shape=[None, temporal_size, spatial_size, spatial_size, 3])
        backbone = movinet.Movinet(model_id='a0', input_specs=input_specs)

        num_classes = 1000
        model = movinet_model.MovinetClassifier(
            backbone=backbone,
            num_classes=num_classes,
            input_specs={'image': input_specs},
            dropout_rate=0.2)

        inputs = np.random.rand(2, temporal_size, spatial_size, spatial_size,
                                3)
        logits = model(inputs, training=is_training)
        self.assertAllEqual([2, num_classes], logits.shape)
コード例 #15
0
def build_and_export_saved_model(
        export_path: str = '/tmp/movinet/',
        model_id: str = 'a0',
        causal: bool = False,
        conv_type: str = '3d',
        se_type: str = '3d',
        activation: str = 'swish',
        classifier_activation: str = 'swish',
        gating_activation: str = 'sigmoid',
        use_positional_encoding: bool = False,
        num_classes: int = 600,
        input_shape: Optional[Tuple[int, int, int, int, int]] = None,
        bundle_input_init_states_fn: bool = True,
        checkpoint_path: Optional[str] = None) -> None:
    """Builds and exports a MoViNet model to a saved model.

  Args:
    export_path: Export path to save the saved_model file.
    model_id: MoViNet model name.
    causal: Run the model in causal mode.
    conv_type: 3d, 2plus1d, or 3d_2plus1d. 3d configures the network
      to use the default 3D convolution. 2plus1d uses (2+1)D convolution
      with Conv2D operations and 2D reshaping (e.g., a 5x3x3 kernel becomes
      3x3 followed by 5x1 conv). 3d_2plus1d uses (2+1)D convolution with
      Conv3D and no 2D reshaping (e.g., a 5x3x3 kernel becomes 1x3x3
      followed by 5x1x1 conv).
    se_type:
      3d, 2d, or 2plus3d. 3d uses the default 3D spatiotemporal global average
      pooling for squeeze excitation. 2d uses 2D spatial global average pooling
      on each frame. 2plus3d concatenates both 3D and 2D global average
      pooling.
    activation: The main activation to use across layers.
    classifier_activation: The classifier activation to use.
    gating_activation: The gating activation to use in squeeze-excitation
      layers.
    use_positional_encoding: Whether to use positional encoding (only applied
      when causal=True).
    num_classes: The number of classes for prediction.
    input_shape: The 5D spatiotemporal input shape of size
      [batch_size, num_frames, image_height, image_width, num_channels].
      Set the field or a shape position in the field to None for dynamic input.
    bundle_input_init_states_fn: Add init_states as a function signature to the
      saved model. This is not necessary if the input shape is static (e.g.,
      for TF Lite).
    checkpoint_path: Checkpoint path to load. Leave blank for default
      initialization.
  """

    input_specs = tf.keras.layers.InputSpec(shape=input_shape)

    # Override swish activation implementation to remove custom gradients
    if activation == 'swish':
        activation = 'simple_swish'
    if classifier_activation == 'swish':
        classifier_activation = 'simple_swish'

    backbone = movinet.Movinet(model_id=model_id,
                               causal=causal,
                               use_positional_encoding=use_positional_encoding,
                               conv_type=conv_type,
                               se_type=se_type,
                               input_specs=input_specs,
                               activation=activation,
                               gating_activation=gating_activation,
                               use_sync_bn=False,
                               use_external_states=causal)
    model = movinet_model.MovinetClassifier(
        backbone,
        num_classes=num_classes,
        output_states=causal,
        input_specs=dict(image=input_specs),
        activation=classifier_activation)

    export_saved_model(model=model,
                       input_shape=input_shape,
                       export_path=export_path,
                       causal=causal,
                       bundle_input_init_states_fn=bundle_input_init_states_fn,
                       checkpoint_path=checkpoint_path)
コード例 #16
0
def main(_) -> None:
    input_specs = tf.keras.layers.InputSpec(shape=[
        FLAGS.batch_size,
        FLAGS.num_frames,
        FLAGS.image_size,
        FLAGS.image_size,
        3,
    ])

    # Use dimensions of 1 except the channels to export faster,
    # since we only really need the last dimension to build and get the output
    # states. These dimensions can be set to `None` once the model is built.
    input_shape = [1 if s is None else s for s in input_specs.shape]

    # Override swish activation implementation to remove custom gradients
    activation = FLAGS.activation
    if activation == 'swish':
        activation = 'simple_swish'

    classifier_activation = FLAGS.classifier_activation
    if classifier_activation == 'swish':
        classifier_activation = 'simple_swish'

    backbone = movinet.Movinet(
        model_id=FLAGS.model_id,
        causal=FLAGS.causal,
        use_positional_encoding=FLAGS.use_positional_encoding,
        conv_type=FLAGS.conv_type,
        se_type=FLAGS.se_type,
        input_specs=input_specs,
        activation=activation,
        gating_activation=FLAGS.gating_activation,
        use_sync_bn=False,
        use_external_states=FLAGS.causal)
    model = movinet_model.MovinetClassifier(
        backbone,
        num_classes=FLAGS.num_classes,
        output_states=FLAGS.causal,
        input_specs=dict(image=input_specs),
        activation=classifier_activation)
    model.build(input_shape)

    # Compile model to generate some internal Keras variables.
    model.compile()

    if FLAGS.checkpoint_path:
        checkpoint = tf.train.Checkpoint(model=model)
        status = checkpoint.restore(FLAGS.checkpoint_path)
        status.assert_existing_objects_matched()

    if FLAGS.causal:
        # Call the model once to get the output states. Call again with `states`
        # input to ensure that the inputs with the `states` argument is built
        # with the full output state shapes.
        input_image = tf.ones(input_shape)
        _, states = model({
            **model.init_states(input_shape), 'image':
            input_image
        })
        _ = model({**states, 'image': input_image})

        # Create a function to explicitly set the names of the outputs
        def predict(inputs):
            outputs, states = model(inputs)
            return {**states, 'logits': outputs}

        specs = {
            name: tf.TensorSpec(spec.shape, name=name, dtype=spec.dtype)
            for name, spec in model.initial_state_specs(
                input_specs.shape).items()
        }
        specs['image'] = tf.TensorSpec(input_specs.shape,
                                       dtype=model.dtype,
                                       name='image')

        predict_fn = tf.function(predict, jit_compile=True)
        predict_fn = predict_fn.get_concrete_function(specs)

        init_states_fn = tf.function(model.init_states, jit_compile=True)
        init_states_fn = init_states_fn.get_concrete_function(
            tf.TensorSpec([5], dtype=tf.int32))

        if FLAGS.bundle_input_init_states_fn:
            signatures = {'call': predict_fn, 'init_states': init_states_fn}
        else:
            signatures = predict_fn

        tf.keras.models.save_model(model,
                                   FLAGS.export_path,
                                   signatures=signatures)
    else:
        _ = model(tf.ones(input_shape))
        tf.keras.models.save_model(model, FLAGS.export_path)

    print(' ----- Done. Saved Model is saved at {}'.format(FLAGS.export_path))