コード例 #1
0
def main(_) -> None:
  backbone_2plus1d = movinet.Movinet(
      model_id=FLAGS.model_id,
      causal=FLAGS.causal,
      conv_type='2plus1d',
      use_positional_encoding=FLAGS.use_positional_encoding)
  model_2plus1d = movinet_model.MovinetClassifier(
      backbone=backbone_2plus1d,
      num_classes=FLAGS.num_classes)
  model_2plus1d.build([1, 1, 1, 1, 3])

  backbone_3d_2plus1d = movinet.Movinet(
      model_id=FLAGS.model_id,
      causal=FLAGS.causal,
      conv_type='3d_2plus1d',
      use_positional_encoding=FLAGS.use_positional_encoding)
  model_3d_2plus1d = movinet_model.MovinetClassifier(
      backbone=backbone_3d_2plus1d,
      num_classes=FLAGS.num_classes)
  model_3d_2plus1d.build([1, 1, 1, 1, 3])

  checkpoint = tf.train.Checkpoint(model=model_3d_2plus1d)
  status = checkpoint.restore(FLAGS.input_checkpoint_path)
  status.assert_existing_objects_matched()

  # Ensure both models have the same weights
  weights = []
  for var_2plus1d, var_3d_2plus1d in zip(
      model_2plus1d.get_weights(), model_3d_2plus1d.get_weights()):
    if var_2plus1d.shape == var_3d_2plus1d.shape:
      weights.append(var_3d_2plus1d)
    else:
      if var_3d_2plus1d.shape[0] == 1:
        weight = var_3d_2plus1d[0]
      else:
        weight = var_3d_2plus1d[:, 0]
      if weight.shape[-1] != var_2plus1d.shape[-1]:
        # Transpose any depthwise kernels (conv3d --> depthwise_conv2d)
        weight = tf.transpose(weight, perm=(0, 1, 3, 2))
      weights.append(weight)
  model_2plus1d.set_weights(weights)

  if FLAGS.verify_output:
    inputs = tf.random.uniform([1, 6, 64, 64, 3], dtype=tf.float32)

    logits_2plus1d = model_2plus1d(inputs)
    logits_3d_2plus1d = model_3d_2plus1d(inputs)

    if tf.reduce_mean(logits_2plus1d - logits_3d_2plus1d) > 1e-5:
      raise ValueError('Bad conversion, model outputs do not match.')

  save_checkpoint = tf.train.Checkpoint(
      model=model_2plus1d, backbone=backbone_2plus1d)
  save_checkpoint.save(FLAGS.output_checkpoint_path)
コード例 #2
0
    def test_movinet_stream(self):
        """Test if the backbone can be run in streaming mode."""
        tf.keras.backend.set_image_data_format('channels_last')

        backbone = movinet.Movinet(
            model_id='a0',
            causal=True,
            use_external_states=True,
        )
        inputs = tf.ones([1, 5, 128, 128, 3])

        init_states = backbone.init_states(tf.shape(inputs))
        expected_endpoints, _ = backbone({**init_states, 'image': inputs})

        frames = tf.split(inputs, inputs.shape[1], axis=1)

        states = init_states
        for frame in frames:
            output, states = backbone({**states, 'image': frame})
        predicted_endpoints = output

        predicted = predicted_endpoints['head']

        # The expected final output is simply the mean across frames
        expected = expected_endpoints['head']
        expected = tf.reduce_mean(expected, 1, keepdims=True)

        self.assertEqual(predicted.shape, expected.shape)
        self.assertAllClose(predicted, expected, 1e-5, 1e-5)
コード例 #3
0
    def test_network_with_states(self):
        """Test creation of MoViNet family models with states."""
        tf.keras.backend.set_image_data_format('channels_last')

        backbone = movinet.Movinet(
            model_id='a0',
            causal=True,
            use_external_states=True,
        )
        inputs = tf.ones([1, 8, 128, 128, 3])

        init_states = backbone.init_states(tf.shape(inputs))
        endpoints, new_states = backbone({**init_states, 'image': inputs})

        self.assertAllEqual(endpoints['stem'].shape, [1, 8, 64, 64, 8])
        self.assertAllEqual(endpoints['block0_layer0'].shape,
                            [1, 8, 32, 32, 8])
        self.assertAllEqual(endpoints['block1_layer0'].shape,
                            [1, 8, 16, 16, 32])
        self.assertAllEqual(endpoints['block2_layer0'].shape, [1, 8, 8, 8, 56])
        self.assertAllEqual(endpoints['block3_layer0'].shape, [1, 8, 8, 8, 56])
        self.assertAllEqual(endpoints['block4_layer0'].shape,
                            [1, 8, 4, 4, 104])
        self.assertAllEqual(endpoints['head'].shape, [1, 1, 1, 1, 480])

        self.assertNotEmpty(init_states)
        self.assertNotEmpty(new_states)
コード例 #4
0
    def test_movinet_classifier_stream(self):
        tf.keras.backend.set_image_data_format('channels_last')

        model = movinet.Movinet(
            model_id='a0',
            causal=True,
        )
        inputs = tf.ones([1, 5, 128, 128, 3])

        expected_endpoints, _ = model(dict(image=inputs, states={}))

        frames = tf.split(inputs, inputs.shape[1], axis=1)

        output, states = None, {}
        for frame in frames:
            output, states = model(dict(image=frame, states=states))
        predicted_endpoints = output

        predicted = predicted_endpoints['head']

        # The expected final output is simply the mean across frames
        expected = expected_endpoints['head']
        expected = tf.reduce_mean(expected, 1, keepdims=True)

        self.assertEqual(predicted.shape, expected.shape)
        self.assertAllClose(predicted, expected, 1e-5, 1e-5)
コード例 #5
0
  def test_movinet_classifier_mobile(self):
    """Test if the model can run with mobile parameters."""
    tf.keras.backend.set_image_data_format('channels_last')

    backbone = movinet.Movinet(
        model_id='a0',
        causal=True,
        use_external_states=True,
        conv_type='2plus1d',
        se_type='2plus3d',
        activation='hard_swish',
        gating_activation='hard_sigmoid'
    )
    model = movinet_model.MovinetClassifier(
        backbone, num_classes=600, output_states=True)

    inputs = tf.ones([1, 8, 172, 172, 3])

    init_states = model.init_states(tf.shape(inputs))
    expected, _ = model({**init_states, 'image': inputs})

    frames = tf.split(inputs, inputs.shape[1], axis=1)

    states = init_states
    for frame in frames:
      output, states = model({**states, 'image': frame})
    predicted = output

    self.assertEqual(predicted.shape, expected.shape)
    self.assertAllClose(predicted, expected, 1e-5, 1e-5)
コード例 #6
0
    def test_movinet_classifier_stream_pos_enc(self):
        """Test if the classifier can be run in streaming mode with pos encoding."""
        tf.keras.backend.set_image_data_format('channels_last')

        backbone = movinet.Movinet(
            model_id='a0',
            causal=True,
            use_external_states=True,
            use_positional_encoding=True,
        )
        model = movinet_model.MovinetClassifier(backbone,
                                                num_classes=600,
                                                output_states=True)

        inputs = tf.ones([1, 8, 172, 172, 3])

        init_states = model.init_states(tf.shape(inputs))
        expected, _ = model({**init_states, 'image': inputs})

        frames = tf.split(inputs, inputs.shape[1], axis=1)

        states = init_states
        for frame in frames:
            output, states = model({**states, 'image': frame})
        predicted = output

        self.assertEqual(predicted.shape, expected.shape)
        self.assertAllClose(predicted, expected, 1e-5, 1e-5)
コード例 #7
0
    def test_convert_model(self):
        saved_model_path = self.get_temp_dir()
        input_checkpoint_path = os.path.join(saved_model_path, 'ckpt-input')
        output_checkpoint_path = os.path.join(saved_model_path, 'ckpt')

        model_3d_2plus1d = movinet_model.MovinetClassifier(
            backbone=movinet.Movinet(model_id='a0', conv_type='3d_2plus1d'),
            num_classes=600)
        model_3d_2plus1d.build([1, 1, 1, 1, 3])
        save_checkpoint = tf.train.Checkpoint(model=model_3d_2plus1d)
        save_checkpoint.save(input_checkpoint_path)

        FLAGS.input_checkpoint_path = f'{input_checkpoint_path}-1'
        FLAGS.output_checkpoint_path = output_checkpoint_path
        FLAGS.model_id = 'a0'
        FLAGS.use_positional_encoding = False
        FLAGS.num_classes = 600
        FLAGS.verify_output = True

        convert_3d_2plus1d.main('unused_args')

        print(os.listdir(saved_model_path))

        self.assertTrue(
            tf.io.gfile.exists(f'{output_checkpoint_path}-1.index'))
コード例 #8
0
  def test_movinet_a0_2plus1d(self):
    """Test creation of MoViNet with 2plus1d configuration."""
    tf.keras.backend.set_image_data_format('channels_last')

    model_2plus1d = movinet_model.MovinetClassifier(
        backbone=movinet.Movinet(
            model_id='a0',
            conv_type='2plus1d'),
        num_classes=600)
    model_2plus1d.build([1, 1, 1, 1, 3])

    model_3d_2plus1d = movinet_model.MovinetClassifier(
        backbone=movinet.Movinet(
            model_id='a0',
            conv_type='3d_2plus1d'),
        num_classes=600)
    model_3d_2plus1d.build([1, 1, 1, 1, 3])

    # Ensure both models have the same weights
    weights = []
    for var_2plus1d, var_3d_2plus1d in zip(
        model_2plus1d.get_weights(), model_3d_2plus1d.get_weights()):
      if var_2plus1d.shape == var_3d_2plus1d.shape:
        weights.append(var_3d_2plus1d)
      else:
        if var_3d_2plus1d.shape[0] == 1:
          weight = var_3d_2plus1d[0]
        else:
          weight = var_3d_2plus1d[:, 0]
        if weight.shape[-1] != var_2plus1d.shape[-1]:
          # Transpose any depthwise kernels (conv3d --> depthwise_conv2d)
          weight = tf.transpose(weight, perm=(0, 1, 3, 2))
        weights.append(weight)
    model_2plus1d.set_weights(weights)

    inputs = tf.ones([2, 8, 172, 172, 3], dtype=tf.float32)

    logits_2plus1d = model_2plus1d(inputs)
    logits_3d_2plus1d = model_3d_2plus1d(inputs)

    # Ensure both models have the same output, since the weights are the same
    self.assertAllEqual(logits_2plus1d.shape, logits_3d_2plus1d.shape)
    self.assertAllClose(logits_2plus1d, logits_3d_2plus1d, 1e-5, 1e-5)
コード例 #9
0
    def test_movinet_models(self, model_id, expected_params_millions):
        """Test creation of MoViNet family models with states."""
        tf.keras.backend.set_image_data_format('channels_last')

        model = movinet_model.MovinetClassifier(backbone=movinet.Movinet(
            model_id=model_id, causal=True),
                                                num_classes=600)
        model.build([1, 1, 1, 1, 3])
        num_params_millions = model.count_params() / 1e6

        self.assertEqual(num_params_millions, expected_params_millions)
コード例 #10
0
    def test_saved_model_save_load(self):
        backbone = movinet.Movinet('a0')
        model = movinet_model.MovinetClassifier(backbone, num_classes=600)
        model.build([1, 5, 172, 172, 3])
        model.compile(metrics=['acc'])

        tf.keras.models.save_model(model, '/tmp/movinet/')
        loaded_model = tf.keras.models.load_model('/tmp/movinet/')

        output = loaded_model(dict(image=tf.ones([1, 1, 1, 1, 3])))

        self.assertAllEqual(output.shape, [1, 600])
コード例 #11
0
  def test_serialize_deserialize(self):
    """Validate the classification network can be serialized and deserialized."""

    backbone = movinet.Movinet(model_id='a0')

    model = movinet_model.MovinetClassifier(backbone=backbone, num_classes=1000)

    config = model.get_config()
    new_model = movinet_model.MovinetClassifier.from_config(config)

    # Validate that the config can be forced to JSON.
    new_model.to_json()

    # If the serialization was successful, the new config should match the old.
    self.assertAllEqual(model.get_config(), new_model.get_config())
コード例 #12
0
ファイル: movinet_test.py プロジェクト: zlsh80826/models
    def test_serialize_deserialize(self):
        # Create a network object that sets all of its config options.
        kwargs = dict(
            model_id='a0',
            causal=True,
            use_positional_encoding=True,
        )
        network = movinet.Movinet(**kwargs)

        # Create another network object from the first object's config.
        new_network = movinet.Movinet.from_config(network.get_config())

        # Validate that the config can be forced to JSON.
        _ = new_network.to_json()

        # If the serialization was successful, the new config should match the old.
        self.assertAllEqual(network.get_config(), new_network.get_config())
コード例 #13
0
    def test_network_creation(self):
        """Test creation of MoViNet family models."""
        tf.keras.backend.set_image_data_format('channels_last')

        network = movinet.Movinet(
            model_id='a0',
            causal=True,
        )
        inputs = tf.keras.Input(shape=(8, 128, 128, 3), batch_size=1)
        endpoints, states = network(inputs)

        self.assertAllEqual(endpoints['stem'].shape, [1, 8, 64, 64, 8])
        self.assertAllEqual(endpoints['b0/l0'].shape, [1, 8, 32, 32, 8])
        self.assertAllEqual(endpoints['b1/l0'].shape, [1, 8, 16, 16, 32])
        self.assertAllEqual(endpoints['b2/l0'].shape, [1, 8, 8, 8, 56])
        self.assertAllEqual(endpoints['b3/l0'].shape, [1, 8, 8, 8, 56])
        self.assertAllEqual(endpoints['b4/l0'].shape, [1, 8, 4, 4, 104])
        self.assertAllEqual(endpoints['head'].shape, [1, 1, 1, 1, 480])

        self.assertNotEmpty(states)
コード例 #14
0
  def test_movinet_classifier_creation(self, is_training):
    """Test for creation of a Movinet classifier."""
    temporal_size = 16
    spatial_size = 224
    tf.keras.backend.set_image_data_format('channels_last')

    input_specs = tf.keras.layers.InputSpec(
        shape=[None, temporal_size, spatial_size, spatial_size, 3])
    backbone = movinet.Movinet(model_id='a0', input_specs=input_specs)

    num_classes = 1000
    model = movinet_model.MovinetClassifier(
        backbone=backbone,
        num_classes=num_classes,
        input_specs={'image': input_specs},
        dropout_rate=0.2)

    inputs = np.random.rand(2, temporal_size, spatial_size, spatial_size, 3)
    logits = model(inputs, training=is_training)
    self.assertAllEqual([2, num_classes], logits.shape)
コード例 #15
0
ファイル: movinet_test.py プロジェクト: zlsh80826/models
    def test_network_with_states(self):
        """Test creation of MoViNet family models with states."""
        tf.keras.backend.set_image_data_format('channels_last')

        network = movinet.Movinet(
            model_id='a0',
            causal=True,
        )
        inputs = tf.ones([1, 8, 128, 128, 3])

        _, states = network(inputs)
        endpoints, new_states = network(dict(image=inputs, states=states))

        self.assertAllEqual(endpoints['stem'].shape, [1, 8, 64, 64, 8])
        self.assertAllEqual(endpoints['b0/l0'].shape, [1, 8, 32, 32, 8])
        self.assertAllEqual(endpoints['b1/l0'].shape, [1, 8, 16, 16, 32])
        self.assertAllEqual(endpoints['b2/l0'].shape, [1, 8, 8, 8, 56])
        self.assertAllEqual(endpoints['b3/l0'].shape, [1, 8, 8, 8, 56])
        self.assertAllEqual(endpoints['b4/l0'].shape, [1, 8, 4, 4, 104])
        self.assertAllEqual(endpoints['head'].shape, [1, 1, 1, 1, 480])

        self.assertNotEmpty(states)
        self.assertNotEmpty(new_states)
コード例 #16
0
def main(argv: Sequence[str]) -> None:
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    # Use dimensions of 1 except the channels to export faster,
    # since we only really need the last dimension to build and get the output
    # states. These dimensions will be set to `None` once the model is built.
    input_shape = [1, 1, 1, 1, 3]

    backbone = movinet.Movinet(FLAGS.model_id,
                               causal=FLAGS.causal,
                               conv_type=FLAGS.conv_type)
    model = movinet_model.MovinetClassifier(backbone,
                                            num_classes=FLAGS.num_classes,
                                            output_states=FLAGS.causal)
    model.build(input_shape)

    if FLAGS.checkpoint_path:
        model.load_weights(FLAGS.checkpoint_path)

    if FLAGS.causal:
        # Call the model once to get the output states. Call again with `states`
        # input to ensure that the inputs with the `states` argument is built
        _, states = model(dict(image=tf.ones(input_shape), states={}))
        _, states = model(dict(image=tf.ones(input_shape), states=states))

        input_spec = tf.TensorSpec(shape=[None, None, None, None, 3],
                                   dtype=tf.float32,
                                   name='inputs')

        state_specs = {}
        for name, state in states.items():
            shape = state.shape
            if len(state.shape) == 5:
                shape = [None, state.shape[1], None, None, state.shape[-1]]
            new_spec = tf.TensorSpec(shape=shape, dtype=state.dtype, name=name)
            state_specs[name] = new_spec

        specs = (input_spec, state_specs)

        # Define a tf.keras.Model with custom signatures to allow it to accept
        # a state dict as an argument. We define it inline here because
        # we first need to determine the shape of the state tensors before
        # applying the `input_signature` argument to `tf.function`.
        class ExportStateModule(tf.Module):
            """Module with state for exporting to saved_model."""
            def __init__(self, model):
                self.model = model

            @tf.function(input_signature=[input_spec])
            def __call__(self, inputs):
                return self.model(dict(image=inputs, states={}))

            @tf.function(input_signature=[input_spec])
            def base(self, inputs):
                return self.model(dict(image=inputs, states={}))

            @tf.function(input_signature=specs)
            def stream(self, inputs, states):
                return self.model(dict(image=inputs, states=states))

        module = ExportStateModule(model)

        tf.saved_model.save(module, FLAGS.output_path)
    else:
        _ = model(tf.ones(input_shape))
        tf.keras.models.save_model(model, FLAGS.output_path)

    print(' ----- Done. Saved Model is saved at {}'.format(FLAGS.output_path))
コード例 #17
0
def main(_) -> None:
    input_specs = tf.keras.layers.InputSpec(shape=[
        FLAGS.batch_size,
        FLAGS.num_frames,
        FLAGS.image_size,
        FLAGS.image_size,
        3,
    ])

    # Use dimensions of 1 except the channels to export faster,
    # since we only really need the last dimension to build and get the output
    # states. These dimensions will be set to `None` once the model is built.
    input_shape = [1 if s is None else s for s in input_specs.shape]

    backbone = movinet.Movinet(
        FLAGS.model_id,
        causal=FLAGS.causal,
        conv_type=FLAGS.conv_type,
        use_external_states=FLAGS.causal,
        input_specs=input_specs,
        activation=FLAGS.activation,
        gating_activation=FLAGS.gating_activation,
        se_type=FLAGS.se_type,
        use_positional_encoding=FLAGS.use_positional_encoding)
    model = movinet_model.MovinetClassifier(
        backbone,
        num_classes=FLAGS.num_classes,
        output_states=FLAGS.causal,
        input_specs=dict(image=input_specs))
    model.build(input_shape)

    # Compile model to generate some internal Keras variables.
    model.compile()

    if FLAGS.checkpoint_path:
        checkpoint = tf.train.Checkpoint(model=model)
        status = checkpoint.restore(FLAGS.checkpoint_path)
        status.assert_existing_objects_matched()

    if FLAGS.causal:
        # Call the model once to get the output states. Call again with `states`
        # input to ensure that the inputs with the `states` argument is built
        # with the full output state shapes.
        input_image = tf.ones(input_shape)
        _, states = model({
            **model.init_states(input_shape), 'image':
            input_image
        })
        _, states = model({**states, 'image': input_image})

        # Create a function to explicitly set the names of the outputs
        def predict(inputs):
            outputs, states = model(inputs)
            return {**states, 'logits': outputs}

        specs = {
            name: tf.TensorSpec(spec.shape, name=name, dtype=spec.dtype)
            for name, spec in model.initial_state_specs(
                input_specs.shape).items()
        }
        specs['image'] = tf.TensorSpec(input_specs.shape,
                                       dtype=model.dtype,
                                       name='image')

        predict_fn = tf.function(predict, jit_compile=True)
        predict_fn = predict_fn.get_concrete_function(specs)

        init_states_fn = tf.function(model.init_states, jit_compile=True)
        init_states_fn = init_states_fn.get_concrete_function(
            tf.TensorSpec([5], dtype=tf.int32))

        signatures = {'call': predict_fn, 'init_states': init_states_fn}

        tf.keras.models.save_model(model,
                                   FLAGS.export_path,
                                   signatures=signatures)
    else:
        _ = model(tf.ones(input_shape))
        tf.keras.models.save_model(model, FLAGS.export_path)

    print(' ----- Done. Saved Model is saved at {}'.format(FLAGS.export_path))