Ejemplo n.º 1
0
def get_keras_model(bottleneck_dimension,
                    output_dimension,
                    alpha=1.0,
                    mobilenet_size='small',
                    frontend=True,
                    avg_pool=False,
                    compressor=None,
                    qat=False,
                    tflite=False):
    """Make a Keras student model."""
    def _map_fn_lambda(x):
        return tf.map_fn(_sample_to_features, x, dtype=tf.float64)

    def _map_mobilenet_func(mnet_size):
        mnet_size_map = {
            'tiny': mobilenetv3_tiny,
            'small': tf.keras.applications.MobileNetV3Small,
            'large': tf.keras.applications.MobileNetV3Large,
        }
        if mnet_size.lower() not in mnet_size_map:
            raise ValueError('Unknown MobileNet size %s.' % mnet_size)
        return mnet_size_map[mnet_size.lower()]

    # TFLite use-cases usually use non-batched inference, and this also enables
    # hardware acceleration.
    num_batches = 1 if tflite else None
    if frontend:
        model_in = tf.keras.Input((None, ), name='audio_samples')
        feats = tf.keras.layers.Lambda(_map_fn_lambda)(model_in)
        feats.shape.assert_is_compatible_with([None, None, 96, 64])
        feats = tf.transpose(feats, [0, 2, 1, 3])
        feats = tf.reshape(feats, [-1, 96, 64, 1])
    else:
        model_in = tf.keras.Input((96, 64, 1),
                                  name='log_mel_spectrogram',
                                  batch_size=num_batches)
        feats = model_in
    model = _map_mobilenet_func(mobilenet_size)(
        input_shape=[96, 64, 1],
        alpha=alpha,
        minimalistic=False,
        include_top=False,
        weights=None,
        pooling='avg' if avg_pool else None,
        dropout_rate=0.0)
    model_out = model(feats)
    if avg_pool:
        model_out.shape.assert_is_compatible_with([None, None])
    else:
        model_out.shape.assert_is_compatible_with([None, 3, 2, None])
    if bottleneck_dimension:
        if compressor is not None:
            bottleneck = CompressedDense(bottleneck_dimension,
                                         compression_obj=compressor,
                                         name='distilled_output')
        else:
            bottleneck = tf.keras.layers.Dense(bottleneck_dimension,
                                               name='distilled_output')
            if qat:
                bottleneck = tfmot.quantization.keras.\
                  quantize_annotate_layer(bottleneck)
        embeddings = tf.keras.layers.Flatten()(model_out)
        embeddings = bottleneck(embeddings)

        if tflite:
            # We generate TFLite models just for the embeddings.
            output_model = tf.keras.Model(inputs=model_in, outputs=embeddings)
            if compressor is not None:
                # If model employs compression, this ensures that the TFLite model
                # just uses the smaller matrices for inference.
                output_model.get_layer('distilled_output').kernel = None
                output_model.get_layer(
                    'distilled_output').compression_op.a_matrix_tfvar = None
            return output_model
    else:
        embeddings = tf.keras.layers.Flatten(
            name='distilled_output')(model_out)
    output = tf.keras.layers.Dense(output_dimension,
                                   name='embedding_to_target')(embeddings)
    output_model = tf.keras.Model(inputs=model_in, outputs=output)
    return output_model
Ejemplo n.º 2
0
def get_keras_model(bottleneck_dimension,
                    output_dimension,
                    alpha=1.0,
                    mobilenet_size='small',
                    frontend=True,
                    avg_pool=False,
                    compressor=None,
                    qat=False):
    """Make a Keras student model."""
    def _map_fn_lambda(x):
        return tf.map_fn(_sample_to_features, x, dtype=tf.float64)

    def _map_mobilenet_func(mnet_size):
        mnet_size_map = {
            'tiny': mobilenetv3_tiny,
            'small': tf.keras.applications.MobileNetV3Small,
            'large': tf.keras.applications.MobileNetV3Large,
        }
        if mnet_size.lower() not in mnet_size_map:
            raise ValueError('Unknown MobileNet size %s.' % mnet_size)
        return mnet_size_map[mnet_size.lower()]

    if frontend:
        model_in = tf.keras.Input((None, ), name='audio_samples')
        feats = tf.keras.layers.Lambda(_map_fn_lambda)(model_in)
        feats.shape.assert_is_compatible_with([None, None, 96, 64])
        feats = tf.transpose(feats, [0, 2, 1, 3])
        feats = tf.reshape(feats, [-1, 96, 64, 1])
    else:
        model_in = tf.keras.Input((96, 64, 1), name='log_mel_spectrogram')
        feats = model_in
    model = _map_mobilenet_func(mobilenet_size)(
        input_shape=[96, 64, 1],
        alpha=alpha,
        minimalistic=False,
        include_top=False,
        weights=None,
        pooling='avg' if avg_pool else None,
        dropout_rate=0.0)
    model_out = model(feats)
    if avg_pool:
        model_out.shape.assert_is_compatible_with([None, None])
    else:
        model_out.shape.assert_is_compatible_with([None, 3, 2, None])
    if bottleneck_dimension:
        if compressor is not None:
            bottleneck = CompressedDense(bottleneck_dimension,
                                         compression_obj=compressor,
                                         name='distilled_output')
        else:
            bottleneck = tf.keras.layers.Dense(bottleneck_dimension,
                                               name='distilled_output')
            if qat:
                bottleneck = tfmot.quantization.keras.\
                  quantize_annotate_layer(bottleneck)
        embeddings = tf.keras.layers.Flatten()(model_out)
        embeddings = bottleneck(embeddings)
    else:
        embeddings = tf.keras.layers.Flatten(
            name='distilled_output')(model_out)
    output = tf.keras.layers.Dense(output_dimension,
                                   name='embedding_to_target')(embeddings)
    output_model = tf.keras.Model(inputs=model_in, outputs=output)
    return output_model
Ejemplo n.º 3
0
def get_keras_model(bottleneck_dimension,
                    output_dimension,
                    alpha=1.0,
                    mobilenet_size='small',
                    frontend=True,
                    avg_pool=False,
                    compressor=None,
                    quantize_aware_training=False,
                    tflite=False):
    """Make a Keras student model."""
    # For debugging, log hyperparameter values.
    logging.info('bottleneck_dimension: %i', bottleneck_dimension)
    logging.info('output_dimension: %i', output_dimension)
    logging.info('alpha: %s', alpha)
    logging.info('frontend: %s', frontend)
    logging.info('avg_pool: %s', avg_pool)
    logging.info('compressor: %s', compressor)
    logging.info('quantize_aware_training: %s', quantize_aware_training)
    logging.info('tflite: %s', tflite)

    output_dict = {}  # Dictionary of model outputs.

    def _map_mobilenet_func(mnet_size):
        mnet_size_map = {
            'tiny': mobilenetv3_tiny,
            'small': tf.keras.applications.MobileNetV3Small,
            'large': tf.keras.applications.MobileNetV3Large,
        }
        if mnet_size.lower() not in mnet_size_map:
            raise ValueError('Unknown MobileNet size %s.' % mnet_size)
        return mnet_size_map[mnet_size.lower()]

    # TFLite use-cases usually use non-batched inference, and this also enables
    # hardware acceleration.
    num_batches = 1 if tflite else None
    if frontend:
        frontend_args = tf_frontend.frontend_args_from_flags()
        logging.info('frontend_args: %s', frontend_args)
        model_in = tf.keras.Input((None, ),
                                  name='audio_samples',
                                  batch_size=num_batches)
        frontend_fn = _get_feats_map_fn(tflite, frontend_args)
        feats = tf.keras.layers.Lambda(frontend_fn)(model_in)
        feats = tf.reshape(feats, [-1, 96, 64, 1])
    else:
        model_in = tf.keras.Input((96, 64, 1), name='log_mel_spectrogram')
        feats = model_in
    inputs = [model_in]

    model = _map_mobilenet_func(mobilenet_size)(
        input_shape=[96, 64, 1],
        alpha=alpha,
        minimalistic=False,
        include_top=False,
        weights=None,
        pooling='avg' if avg_pool else None,
        dropout_rate=0.0)
    model_out = model(feats)
    if avg_pool:
        model_out.shape.assert_is_compatible_with([None, None])
    else:
        model_out.shape.assert_is_compatible_with([None, 1, 1, None])
    if bottleneck_dimension:
        if compressor is not None:
            bottleneck = CompressedDense(bottleneck_dimension,
                                         compression_obj=compressor,
                                         name='distilled_output')
        else:
            bottleneck = tf.keras.layers.Dense(bottleneck_dimension,
                                               name='distilled_output')
            if quantize_aware_training:
                bottleneck = tfmot.quantization.keras.quantize_annotate_layer(
                    bottleneck)
        embeddings = tf.keras.layers.Flatten()(model_out)
        embeddings = bottleneck(embeddings)
    else:
        embeddings = tf.keras.layers.Flatten(
            name='distilled_output')(model_out)

    # Construct optional final layer, and create output dictionary.
    output_dict['embedding'] = embeddings
    if output_dimension:
        output = tf.keras.layers.Dense(output_dimension,
                                       name='embedding_to_target')(embeddings)
        output_dict['embedding_to_target'] = output
    output_model = tf.keras.Model(inputs=inputs, outputs=output_dict)

    # Optional modifications to the model for TFLite.
    if tflite:
        if compressor is not None:
            # If model employs compression, this ensures that the TFLite model
            # just uses the smaller matrices for inference.
            output_model.get_layer('distilled_output').kernel = None
            output_model.get_layer(
                'distilled_output').compression_op.a_matrix_tfvar = None

    return output_model
Ejemplo n.º 4
0
def get_keras_model(model_type,
                    bottleneck_dimension,
                    output_dimension,
                    frontend=True,
                    compressor=None,
                    quantize_aware_training=False,
                    tflite=False):
    """Make a Keras student model."""
    # For debugging, log hyperparameter values.
    logging.info('model name: %s', model_type)
    logging.info('bottleneck_dimension: %i', bottleneck_dimension)
    logging.info('output_dimension: %i', output_dimension)
    logging.info('frontend: %s', frontend)
    logging.info('compressor: %s', compressor)
    logging.info('quantize_aware_training: %s', quantize_aware_training)
    logging.info('tflite: %s', tflite)

    output_dict = {}  # Dictionary of model outputs.

    # TFLite use-cases usually use non-batched inference, and this also enables
    # hardware acceleration.
    num_batches = 1 if tflite else None
    frontend_args = frontend_lib.frontend_args_from_flags()
    feats_inner_dim = frontend_lib.get_frontend_output_shape()[0]
    if frontend:
        logging.info('frontend_args: %s', frontend_args)
        model_in = tf.keras.Input((None, ),
                                  name='audio_samples',
                                  batch_size=num_batches)
        frontend_fn = frontend_lib.get_feats_map_fn(tflite, frontend_args)
        feats = tf.keras.layers.Lambda(frontend_fn)(model_in)
        feats.shape.assert_is_compatible_with([
            num_batches, feats_inner_dim, frontend_args['frame_width'],
            frontend_args['num_mel_bins']
        ])
        feats = tf.reshape(feats, [
            -1, feats_inner_dim * frontend_args['frame_width'],
            frontend_args['num_mel_bins'], 1
        ])
    else:
        model_in = tf.keras.Input(
            (feats_inner_dim * frontend_args['frame_width'],
             frontend_args['num_mel_bins'], 1),
            batch_size=num_batches,
            name='log_mel_spectrogram')
        feats = model_in
    inputs = [model_in]

    # Build network.
    if model_type.startswith('mobilenet_'):
        # Format is "mobilenet_{size}_{alpha}_{avg_pool}"
        _, mobilenet_size, alpha, avg_pool = model_type.split('_')
        alpha = float(alpha)
        avg_pool = bool(avg_pool)
        logging.info('mobilenet_size: %s', mobilenet_size)
        logging.info('alpha: %f', alpha)
        logging.info('avg_pool: %s', avg_pool)
        model = _map_mobilenet_func(mobilenet_size)(
            input_shape=(feats_inner_dim * frontend_args['frame_width'],
                         frontend_args['num_mel_bins'], 1),
            alpha=alpha,
            minimalistic=False,
            include_top=False,
            weights=None,
            pooling='avg' if avg_pool else None,
            dropout_rate=0.0)
        expected_output_shape = [None, None
                                 ] if avg_pool else [None, 1, 1, None]
    elif model_type.startswith('efficientnet'):
        model_fn, final_dim = {
            'efficientnetb0': (tf.keras.applications.EfficientNetB0, 1280),
            'efficientnetb1': (tf.keras.applications.EfficientNetB1, 1280),
            'efficientnetb2': (tf.keras.applications.EfficientNetB2, 1408),
            'efficientnetb3': (tf.keras.applications.EfficientNetB3, 1536),
        }[model_type]
        model = model_fn(
            include_top=False,
            weights=None,  # could be pretrained from imagenet.
            input_shape=(feats_inner_dim * frontend_args['frame_width'],
                         frontend_args['num_mel_bins'], 1),
            pooling='avg',
        )
        expected_output_shape = [None, final_dim]
    else:
        raise ValueError(f'`model_type` not recognized: {model_type}')

    # TODO(joelshor): Consider checking that there are trainable weights in
    # `model`.
    model_out = model(feats)
    model_out.shape.assert_is_compatible_with(expected_output_shape)

    if bottleneck_dimension:
        if compressor is not None:
            bottleneck = CompressedDense(bottleneck_dimension,
                                         compression_obj=compressor,
                                         name='distilled_output')
        else:
            bottleneck = tf.keras.layers.Dense(bottleneck_dimension,
                                               name='distilled_output')
            if quantize_aware_training:
                bottleneck = tfmot.quantization.keras.quantize_annotate_layer(
                    bottleneck)
        embeddings = tf.keras.layers.Flatten()(model_out)
        embeddings = bottleneck(embeddings)
    else:
        embeddings = tf.keras.layers.Flatten(
            name='distilled_output')(model_out)

    # Construct optional final layer, and create output dictionary.
    output_dict['embedding'] = embeddings
    if output_dimension:
        output = tf.keras.layers.Dense(output_dimension,
                                       name='embedding_to_target')(embeddings)
        output_dict['embedding_to_target'] = output
    output_model = tf.keras.Model(inputs=inputs, outputs=output_dict)
    # Optional modifications to the model for TFLite.
    if tflite:
        if compressor is not None:
            # If model employs compression, this ensures that the TFLite model
            # just uses the smaller matrices for inference.
            output_model.get_layer('distilled_output').kernel = None
            output_model.get_layer(
                'distilled_output').compression_op.a_matrix_tfvar = None

    return output_model