Exemple #1
0
def run(modality,
        inputs,
        output_dir=None,
        suffix=None,
        input_repr="mel256",
        content_type="music",
        audio_embedding_size=6144,
        audio_center=True,
        audio_hop_size=0.1,
        audio_batch_size=32,
        image_embedding_size=8192,
        image_batch_size=32,
        overwrite=False,
        verbose=False):
    """
    Computes and saves L3 embedding for given inputs.

    Parameters
    ----------
    modality : str
        String to specify the modalities to be processed: audio, image, or video
    inputs : list of str, or str
        File/directory path or list of file/directory paths to be processed
    output_dir : str or None
        Path to directory for saving output files. If None, output files will
        be saved to the directory containing the input file.
    suffix : str or None
        String to be appended to the output filename, i.e. <base filename>_<suffix>.npy.
        If None, then no suffix will be added, i.e. <base filename>.npy.
    input_repr : "linear", "mel128", or "mel256"
        Spectrogram representation used for model.
    content_type : "music" or "env"
        Type of content used to train embedding.
    audio_embedding_size : 6144 or 512
        Audio embedding dimensionality.
    audio_center : boolean
        If True, pads beginning of signal so timestamps correspond
        to center of window.
    audio_hop_size : float
        Hop size in seconds.
    audio_batch_size : int
        Batch size used for input to audio embedding model
    image_embedding_size : 8192 or 512
        Image embedding dimensionality.
    image_batch_size : int
        Batch size used for input to image embedding model
    overwrite : bool
        If True, overwrites existing output files
    verbose : boolean
        If True, print verbose messages.

    Returns
    -------
    """

    if isinstance(inputs, str):
        file_list = [inputs]
    elif isinstance(inputs, Iterable):
        file_list = get_file_list(inputs)
    else:
        raise OpenL3Error('Invalid input: {}'.format(str(inputs)))

    if len(file_list) == 0:
        print('openl3: No files found in {}. Aborting.'.format(str(inputs)))
        sys.exit(-1)

    # Load model
    if modality == 'audio':
        model = load_audio_embedding_model(input_repr, content_type,
                                           audio_embedding_size)

        # Process all files in the arguments
        process_audio_file(file_list,
                           output_dir=output_dir,
                           suffix=suffix,
                           model=model,
                           center=audio_center,
                           hop_size=audio_hop_size,
                           batch_size=audio_batch_size,
                           overwrite=overwrite,
                           verbose=verbose)
    elif modality == 'image':
        model = load_image_embedding_model(input_repr, content_type,
                                           image_embedding_size)

        # Process all files in the arguments
        process_image_file(file_list,
                           output_dir=output_dir,
                           suffix=suffix,
                           model=model,
                           batch_size=image_batch_size,
                           overwrite=overwrite,
                           verbose=verbose)
    elif modality == 'video':
        audio_model = load_audio_embedding_model(input_repr, content_type,
                                                 audio_embedding_size)
        image_model = load_image_embedding_model(input_repr, content_type,
                                                 image_embedding_size)

        # Process all files in the arguments
        process_video_file(file_list,
                           output_dir=output_dir,
                           suffix=suffix,
                           audio_model=audio_model,
                           image_model=image_model,
                           audio_embedding_size=audio_embedding_size,
                           audio_center=audio_center,
                           audio_hop_size=audio_hop_size,
                           audio_batch_size=audio_batch_size,
                           image_batch_size=image_batch_size,
                           image_embedding_size=image_embedding_size,
                           overwrite=overwrite,
                           verbose=verbose)
    else:
        raise OpenL3Error('Invalid modality: {}'.format(modality))

    if verbose:
        print('openl3: Done!')
Exemple #2
0
def test_get_image_embedding_model(input_repr, content_type, embedding_size,
                                   ref_image_model):
    m = load_image_embedding_model(input_repr, content_type, embedding_size)
    _compare_models(m, ref_image_model, IMAGE_INPUT_REPR_SIZES[input_repr],
                    embedding_size)
Exemple #3
0
def ref_image_model():
    input_repr, content_type, embedding_size = 'linear', 'music', 8192
    m = load_image_embedding_model(input_repr, content_type, embedding_size)
    assert m.output_shape[1] == embedding_size
    return m
Exemple #4
0
def test_load_image_embedding_model():
    m = load_image_embedding_model('linear', 'music', 8192)
    assert m.output_shape[1] == 8192

    first_model = m

    m = load_image_embedding_model('linear', 'music', 512)
    assert m.output_shape[1] == 512
    assert len(m.layers) == len(first_model.layers)
    assert all([
        isinstance(l1, type(l2))
        for (l1, l2) in zip(m.layers, first_model.layers)
    ])

    m = load_image_embedding_model('linear', 'env', 8192)
    assert m.output_shape[1] == 8192
    assert len(m.layers) == len(first_model.layers)
    assert all([
        isinstance(l1, type(l2))
        for (l1, l2) in zip(m.layers, first_model.layers)
    ])

    m = load_image_embedding_model('linear', 'env', 512)
    assert m.output_shape[1] == 512
    assert len(m.layers) == len(first_model.layers)
    assert all([
        isinstance(l1, type(l2))
        for (l1, l2) in zip(m.layers, first_model.layers)
    ])

    m = load_image_embedding_model('mel128', 'music', 8192)
    assert m.output_shape[1] == 8192
    assert len(m.layers) == len(first_model.layers)
    assert all([
        isinstance(l1, type(l2))
        for (l1, l2) in zip(m.layers, first_model.layers)
    ])

    m = load_image_embedding_model('mel128', 'music', 512)
    assert m.output_shape[1] == 512
    assert len(m.layers) == len(first_model.layers)
    assert all([
        isinstance(l1, type(l2))
        for (l1, l2) in zip(m.layers, first_model.layers)
    ])

    m = load_image_embedding_model('mel128', 'env', 8192)
    assert m.output_shape[1] == 8192
    assert len(m.layers) == len(first_model.layers)
    assert all([
        isinstance(l1, type(l2))
        for (l1, l2) in zip(m.layers, first_model.layers)
    ])

    m = load_image_embedding_model('mel128', 'env', 512)
    assert m.output_shape[1] == 512
    assert len(m.layers) == len(first_model.layers)
    assert all([
        isinstance(l1, type(l2))
        for (l1, l2) in zip(m.layers, first_model.layers)
    ])

    m = load_image_embedding_model('mel256', 'music', 8192)
    assert m.output_shape[1] == 8192
    assert len(m.layers) == len(first_model.layers)
    assert all([
        isinstance(l1, type(l2))
        for (l1, l2) in zip(m.layers, first_model.layers)
    ])

    m = load_image_embedding_model('mel256', 'music', 512)
    assert m.output_shape[1] == 512
    assert len(m.layers) == len(first_model.layers)
    assert all([
        isinstance(l1, type(l2))
        for (l1, l2) in zip(m.layers, first_model.layers)
    ])

    m = load_image_embedding_model('mel256', 'env', 8192)
    assert m.output_shape[1] == 8192
    assert len(m.layers) == len(first_model.layers)
    assert all([
        isinstance(l1, type(l2))
        for (l1, l2) in zip(m.layers, first_model.layers)
    ])

    m = load_image_embedding_model('mel256', 'env', 512)
    assert m.output_shape[1] == 512
    assert len(m.layers) == len(first_model.layers)
    assert all([
        isinstance(l1, type(l2))
        for (l1, l2) in zip(m.layers, first_model.layers)
    ])