def __init__(self, label_count, apply_quantization, **kwargs):
        super(ConvModel, self).__init__(**kwargs)

        # create layers
        self.input_quant = quantize_layer.QuantizeLayer(
            AllValuesQuantizer(num_bits=8,
                               per_axis=False,
                               symmetric=False,
                               narrow_range=False))

        self.conv1 = quantize.quantize_layer(
            tf.keras.layers.Conv2D(filters=2,
                                   kernel_size=[1, 3],
                                   padding='SAME'))
        self.bn1 = quantize.quantize_layer(
            tf.keras.layers.BatchNormalization())
        self.relu1 = quantize.quantize_layer(tf.keras.layers.ReLU())

        self.conv2 = ring_buffer.RingBuffer(
            quantize.quantize_layer(
                tf.keras.layers.Conv2D(filters=2,
                                       kernel_size=(3, 1),
                                       dilation_rate=1,
                                       strides=2,
                                       use_bias=False), apply_quantization,
                quantize.NoOpActivationConfig(['kernel'], ['activation'],
                                              False)),
            use_one_step=False,
            inference_batch_size=self.inference_batch_size,
            pad_time_dim='causal')
        self.bn2 = quantize.quantize_layer(
            tf.keras.layers.BatchNormalization(),
            default_8bit_quantize_configs.NoOpQuantizeConfig())
        self.relu2 = quantize.quantize_layer(tf.keras.layers.ReLU())

        self.flatten = ring_buffer.RingBuffer(
            quantize.quantize_layer(tf.keras.layers.Flatten(),
                                    apply_quantization),
            use_one_step=True,
            inference_batch_size=self.inference_batch_size)

        self.dense = quantize.quantize_layer(
            tf.keras.layers.Dense(label_count,
                                  activation='softmax',
                                  use_bias=False), apply_quantization)
Beispiel #2
0
def model(flags):
    """CNN model.

  It is based on paper:
  Convolutional Neural Networks for Small-footprint Keyword Spotting
  http://www.isca-speech.org/archive/interspeech_2015/papers/i15_1478.pdf
  Model topology is similar with "Hello Edge: Keyword Spotting on
  Microcontrollers" https://arxiv.org/pdf/1711.07128.pdf

  Args:
    flags: data/model parameters

  Returns:
    Keras model for training
  """

    input_audio = tf.keras.layers.Input(shape=modes.get_input_data_shape(
        flags, modes.Modes.TRAINING),
                                        batch_size=flags.batch_size)
    net = input_audio

    if flags.preprocess == 'raw':
        # it is a self contained model, user need to feed raw audio only
        net = speech_features.SpeechFeatures(
            speech_features.SpeechFeatures.get_params(flags))(net)

    if flags.quantize:
        net = quantize_layer.QuantizeLayer(
            AllValuesQuantizer(num_bits=8,
                               per_axis=False,
                               symmetric=False,
                               narrow_range=False))(net)

    net = tf.keras.backend.expand_dims(net)
    for filters, kernel_size, activation, dilation_rate, strides in zip(
            utils.parse(flags.cnn_filters), utils.parse(flags.cnn_kernel_size),
            utils.parse(flags.cnn_act), utils.parse(flags.cnn_dilation_rate),
            utils.parse(flags.cnn_strides)):
        net = stream.Stream(cell=quantize.quantize_layer(
            tf.keras.layers.Conv2D(filters=filters,
                                   kernel_size=kernel_size,
                                   dilation_rate=dilation_rate,
                                   activation='linear',
                                   strides=strides), flags.quantize,
            quantize.NoOpActivationConfig(['kernel'], ['activation'], False)),
                            pad_time_dim='causal',
                            use_one_step=False)(net)
        net = quantize.quantize_layer(
            tf.keras.layers.BatchNormalization(),
            default_8bit_quantize_configs.NoOpQuantizeConfig())(net)
        net = quantize.quantize_layer(
            tf.keras.layers.Activation(activation))(net)

    net = stream.Stream(cell=quantize.quantize_layer(
        tf.keras.layers.Flatten(), apply_quantization=flags.quantize))(net)

    net = tf.keras.layers.Dropout(rate=flags.dropout1)(net)

    for units, activation in zip(utils.parse(flags.units2),
                                 utils.parse(flags.act2)):
        net = quantize.quantize_layer(tf.keras.layers.Dense(
            units=units, activation=activation),
                                      apply_quantization=flags.quantize)(net)

    net = quantize.quantize_layer(
        tf.keras.layers.Dense(units=flags.label_count),
        apply_quantization=flags.quantize)(net)
    if flags.return_softmax:
        net = quantize.quantize_layer(tf.keras.layers.Activation('softmax'),
                                      apply_quantization=flags.quantize)(net)
    return tf.keras.Model(input_audio, net)