def create_lstm_cell(self, batch_size, output_size, state_saver,
                         state_name):
        """Create the LSTM cell, and initialize state if necessary.

    Args:
      batch_size: input batch size.
      output_size: output size of the lstm cell, [width, height].
      state_saver: a state saver object with methods `state` and `save_state`.
      state_name: string, the name to use with the state_saver.

    Returns:
      lstm_cell: the lstm cell unit.
      init_state: initial state representations.
      step: the step
    """
        lstm_cell = lstm_cells.BottleneckConvLSTMCell(
            filter_size=(3, 3),
            output_size=output_size,
            num_units=max(self._min_depth, self._lstm_state_depth),
            activation=tf.nn.relu6,
            visualize_gates=False)

        if state_saver is None:
            init_state = lstm_cell.init_state(state_name, batch_size,
                                              tf.float32)
            step = None
        else:
            step = state_saver.state(state_name + '_step')
            c = state_saver.state(state_name + '_c')
            h = state_saver.state(state_name + '_h')
            init_state = (c, h)
        return lstm_cell, init_state, step
コード例 #2
0
    def test_flatten_state(self):
        filter_size = [3, 3]
        output_size = [10, 10]
        num_units = 15
        state_name = 'lstm_state'
        batch_size = 4
        dtype = tf.float32
        unroll = 10
        learned_state = False

        inputs_large = tf.zeros([4, 10, 10, 5], dtype=tf.float32)
        inputs_small = tf.zeros([4, 10, 10, 3], dtype=tf.float32)
        cell = lstm_cells.BottleneckConvLSTMCell(filter_size=filter_size,
                                                 output_size=output_size,
                                                 num_units=num_units,
                                                 pre_bottleneck=True,
                                                 flatten_state=True)
        state = cell.init_state(state_name, batch_size, dtype, learned_state)
        for step in range(unroll):
            if step % 2 == 0:
                inputs = cell.pre_bottleneck(inputs_large, state[1], 0)
            else:
                inputs = cell.pre_bottleneck(inputs_small, state[1], 1)
            output, state = cell(inputs, state)
        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            output_result, state_result = sess.run([output, state])
            self.assertAllEqual((4, 10, 10, 15), output_result.shape)
            self.assertAllEqual((4, 10 * 10 * 15), state_result[0].shape)
            self.assertAllEqual((4, 10 * 10 * 15), state_result[1].shape)
コード例 #3
0
    def test_prebottleneck(self):
        filter_size = [3, 3]
        output_size = [10, 10]
        num_units = 15
        state_name = 'lstm_state'
        batch_size = 4
        dtype = tf.float32
        unroll = 10
        learned_state = False

        inputs_large = tf.zeros([4, 10, 10, 5], dtype=tf.float32)
        inputs_small = tf.zeros([4, 10, 10, 3], dtype=tf.float32)
        cell = lstm_cells.BottleneckConvLSTMCell(filter_size=filter_size,
                                                 output_size=output_size,
                                                 num_units=num_units,
                                                 pre_bottleneck=True)
        state = cell.init_state(state_name, batch_size, dtype, learned_state)
        for step in range(unroll):
            if step % 2 == 0:
                inputs = cell.pre_bottleneck(inputs_large, state[1], 0)
            else:
                inputs = cell.pre_bottleneck(inputs_small, state[1], 1)
            output, state = cell(inputs, state)
        self.assertAllEqual([4, 10, 10, 15], output.shape.as_list())
        self.assertAllEqual([4, 10, 10, 15], state[0].shape.as_list())
        self.assertAllEqual([4, 10, 10, 15], state[1].shape.as_list())
コード例 #4
0
    def test_run_lstm_cell(self):
        filter_size = [3, 3]
        output_size = [10, 10]
        num_units = 15
        state_name = 'lstm_state'
        batch_size = 4
        dtype = tf.float32
        learned_state = False

        inputs = tf.zeros([4, 10, 10, 3], dtype=tf.float32)
        cell = lstm_cells.BottleneckConvLSTMCell(filter_size=filter_size,
                                                 output_size=output_size,
                                                 num_units=num_units)
        init_state = cell.init_state(state_name, batch_size, dtype,
                                     learned_state)
        output, state_tuple = cell(inputs, init_state)
        self.assertAllEqual([4, 10, 10, 15], output.shape.as_list())
        self.assertAllEqual([4, 10, 10, 15], state_tuple[0].shape.as_list())
        self.assertAllEqual([4, 10, 10, 15], state_tuple[1].shape.as_list())
コード例 #5
0
    def test_get_init_learned_state(self):
        filter_size = [3, 3]
        output_size = [10, 10]
        num_units = 15
        state_name = 'lstm_state'
        batch_size = 4
        dtype = tf.float32
        learned_state = True

        cell = lstm_cells.BottleneckConvLSTMCell(filter_size=filter_size,
                                                 output_size=output_size,
                                                 num_units=num_units)
        init_c, init_h = cell.init_state(state_name, batch_size, dtype,
                                         learned_state)

        self.assertEqual(tf.float32, init_c.dtype)
        self.assertEqual(tf.float32, init_h.dtype)
        self.assertAllEqual([4, 10, 10, 15], init_c.shape.as_list())
        self.assertAllEqual([4, 10, 10, 15], init_h.shape.as_list())
コード例 #6
0
    def test_get_init_state(self):
        filter_size = [3, 3]
        output_dim = 10
        output_size = [output_dim] * 2
        num_units = 15
        state_name = 'lstm_state'
        batch_size = 4
        dtype = tf.float32
        learned_state = False

        cell = lstm_cells.BottleneckConvLSTMCell(filter_size=filter_size,
                                                 output_size=output_size,
                                                 num_units=num_units)
        init_c, init_h = cell.init_state(state_name, batch_size, dtype,
                                         learned_state)

        self.assertEqual(tf.float32, init_c.dtype)
        self.assertEqual(tf.float32, init_h.dtype)
        with self.test_session() as sess:
            init_c_res, init_h_res = sess.run([init_c, init_h])
            self.assertAllClose(np.zeros((4, 10, 10, 15)), init_c_res)
            self.assertAllClose(np.zeros((4, 10, 10, 15)), init_h_res)
    def extract_features(self,
                         preprocessed_inputs,
                         state_saver=None,
                         state_name='lstm_state',
                         unroll_length=5,
                         scope=None):
        """Extracts features from preprocessed inputs.

    The features include the base network features, lstm features and SSD
    features, organized in the following name scope:

    <parent scope>/MobilenetV1/...
    <parent scope>/LSTM/...
    <parent scope>/FeatureMaps/...

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float tensor
        representing a batch of consecutive frames from video clips.
      state_saver: A state saver object with methods `state` and `save_state`.
      state_name: A python string for the name to use with the state_saver.
      unroll_length: The number of steps to unroll the lstm.
      scope: The scope for the base network of the feature extractor.

    Returns:
      A list of tensors where the ith tensor has shape [batch, height_i,
      width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)
        with slim.arg_scope(
                mobilenet_v1.mobilenet_v1_arg_scope(
                    is_training=self._is_training)):
            with (slim.arg_scope(self._conv_hyperparams_fn())
                  if self._override_base_feature_extractor_hyperparams else
                  context_manager.IdentityContextManager()):
                with slim.arg_scope([slim.batch_norm], fused=False):
                    # Base network.
                    with tf.variable_scope(scope,
                                           self._base_network_scope,
                                           reuse=self._reuse_weights) as scope:
                        net, image_features = mobilenet_v1.mobilenet_v1_base(
                            ops.pad_to_multiple(preprocessed_inputs,
                                                self._pad_to_multiple),
                            final_endpoint='Conv2d_13_pointwise',
                            min_depth=self._min_depth,
                            depth_multiplier=self._depth_multiplier,
                            scope=scope)

        with slim.arg_scope(self._conv_hyperparams_fn()):
            with slim.arg_scope([slim.batch_norm],
                                fused=False,
                                is_training=self._is_training):
                # ConvLSTM layers.
                with tf.variable_scope(
                        'LSTM', reuse=self._reuse_weights) as lstm_scope:
                    lstm_cell = lstm_cells.BottleneckConvLSTMCell(
                        filter_size=(3, 3),
                        output_size=(net.shape[1].value, net.shape[2].value),
                        num_units=max(self._min_depth, self._lstm_state_depth),
                        activation=tf.nn.relu6,
                        visualize_gates=True)

                    net_seq = list(tf.split(net, unroll_length))
                    if state_saver is None:
                        init_state = lstm_cell.init_state(
                            state_name, net.shape[0].value / unroll_length,
                            tf.float32)
                    else:
                        c = state_saver.state('%s_c' % state_name)
                        h = state_saver.state('%s_h' % state_name)
                        init_state = (c, h)

                    # Identities added for inputing state tensors externally.
                    c_ident = tf.identity(init_state[0],
                                          name='lstm_state_in_c')
                    h_ident = tf.identity(init_state[1],
                                          name='lstm_state_in_h')
                    init_state = (c_ident, h_ident)

                    net_seq, states_out = rnn_decoder.rnn_decoder(
                        net_seq, init_state, lstm_cell, scope=lstm_scope)
                    batcher_ops = None
                    self._states_out = states_out
                    if state_saver is not None:
                        self._step = state_saver.state('%s_step' % state_name)
                        batcher_ops = [
                            state_saver.save_state('%s_c' % state_name,
                                                   states_out[-1][0]),
                            state_saver.save_state('%s_h' % state_name,
                                                   states_out[-1][1]),
                            state_saver.save_state('%s_step' % state_name,
                                                   self._step - 1)
                        ]
                    with tf_ops.control_dependencies(batcher_ops):
                        image_features['Conv2d_13_pointwise_lstm'] = tf.concat(
                            net_seq, 0)

                    # Identities added for reading output states, to be reused externally.
                    tf.identity(states_out[-1][0], name='lstm_state_out_c')
                    tf.identity(states_out[-1][1], name='lstm_state_out_h')

                # SSD layers.
                with tf.variable_scope('FeatureMaps',
                                       reuse=self._reuse_weights):
                    feature_maps = feature_map_generators.multi_resolution_feature_maps(
                        feature_map_layout=self._feature_map_layout,
                        depth_multiplier=(self._depth_multiplier),
                        min_depth=self._min_depth,
                        insert_1x1_conv=True,
                        image_features=image_features)

        return feature_maps.values()