Exemple #1
0
def load_audio_embedding_model_from_path(model_path,
                                         input_repr,
                                         embedding_size,
                                         frontend='kapre'):
    """
    Loads a model with weights at the given path.

    Parameters
    ----------
    model_path : str
        Path to model weights HDF5 (.h5) file. Must be in format
        `*._<input_repr>_<content_type>.h5` or
        `*._<input_repr>_<content_type>-.*.h5`, since model configuration
        will be determined from the filename.
    input_repr : "linear", "mel128", or "mel256"
        Spectrogram representation used for audio model.
    embedding_size : 6144 or 512
        Embedding dimensionality.
    frontend : "kapre" or "librosa"
        The audio frontend to use. If frontend == 'kapre', then the kapre frontend will
        be included. Otherwise no frontend will be added inside the keras model.

    Returns
    -------
    model : tf.keras.Model
        Model object.
    """
    frontend, input_repr = _validate_audio_frontend(frontend, input_repr)

    # Construct embedding model and load model weights
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        m = AUDIO_MODELS[input_repr](include_frontend=frontend == 'kapre')

    m.load_weights(model_path)

    # Pooling for final output embedding size
    pool_size = AUDIO_POOLING_SIZES[input_repr][embedding_size]
    y_a = MaxPooling2D(pool_size=pool_size, padding='same')(m.output)
    y_a = Flatten()(y_a)
    m = Model(inputs=m.input, outputs=y_a)
    m.frontend = frontend
    return m
Exemple #2
0
def load_audio_embedding_model(input_repr,
                               content_type,
                               embedding_size,
                               frontend='kapre'):
    """
    Returns a model with the given characteristics. Loads the model
    if the model has not been loaded yet.

    Parameters
    ----------
    input_repr : "linear", "mel128", or "mel256"
        Spectrogram representation used for audio model.
    content_type : "music" or "env"
        Type of content used to train embedding.
    embedding_size : 6144 or 512
        Embedding dimensionality.
    frontend : "kapre" or "librosa"
        The audio frontend to use. If frontend == 'kapre', then the kapre frontend will
        be included. Otherwise no frontend will be added inside the keras model.

    Returns
    -------
    model : tf.keras.Model
        Model object.
    """
    frontend, input_repr = _validate_audio_frontend(frontend, input_repr)

    # Construct embedding model and load model weights
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        m = AUDIO_MODELS[input_repr](include_frontend=frontend == 'kapre')

    m.load_weights(get_audio_embedding_model_path(input_repr, content_type))

    # Pooling for final output embedding size
    pool_size = AUDIO_POOLING_SIZES[input_repr][embedding_size]
    y_a = MaxPooling2D(pool_size=pool_size, padding='same')(m.output)
    y_a = Flatten()(y_a)
    m = Model(inputs=m.input, outputs=y_a)
    m.frontend = frontend
    return m