Ejemplo n.º 1
0
import openl3
import soundfile as sf
import os
import pickle
from multiprocessing import Pool

def save_pkl(name,data):
    f = open(name, "wb")
    pickle.dump(data,f)
    f.close()

audio_path = '/data/EEV/data-audio/'
save_path = '/data/EEV/audio-zzd-feature/'

model = openl3.models.load_audio_embedding_model(input_repr="linear", content_type="env",
                                                 embedding_size=512)

feature_dict = {} 
count = 0 
audio_filepath_list = {}
for root, dirs, files in os.walk(audio_path, topdown=True):
    for name in files:
        if not name[-3:]=='wav':
            continue
        src_path = audio_path + '/' + name
        audio_filepath_list.append(src_path)

openl3.process_audio_file(audio_filepath_list, batch_size=32, model=model, hop_size=1/6, output_dir=save_path)
Ejemplo n.º 2
0
def test_process_audio_file():
    test_output_dir = tempfile.mkdtemp()
    test_subdir = os.path.join(test_output_dir, "subdir")
    os.makedirs(test_subdir)

    # Load a model and pass it in
    model = openl3.models.load_audio_embedding_model("mel256", "music", 512)

    # Make a copy of the file so we can test the case where we save to the same directory
    input_path_alt = os.path.join(test_subdir, "chirp_mono.wav")
    shutil.copy(CHIRP_MONO_PATH, test_subdir)

    invalid_file_path = os.path.join(test_subdir, "invalid.wav")
    with open(invalid_file_path, 'w') as f:
        f.write('This is not an audio file.')

    exp_output_path1 = os.path.join(test_output_dir, "chirp_mono.npz")
    exp_output_path2 = os.path.join(test_output_dir, "chirp_mono_suffix.npz")
    exp_output_path3 = os.path.join(test_subdir, "chirp_mono.npz")
    try:
        openl3.process_audio_file(CHIRP_MONO_PATH, output_dir=test_output_dir,
                                  model=model)
        openl3.process_audio_file(CHIRP_MONO_PATH, output_dir=test_output_dir,
                                  suffix='suffix', model=model)
        openl3.process_audio_file(input_path_alt, model=model)

        # Make sure we fail when invalid files are provided
        pytest.raises(OpenL3Error, openl3.process_audio_file,
                      invalid_file_path, model=model)

        # Make sure paths all exist
        assert os.path.exists(exp_output_path1)
        assert os.path.exists(exp_output_path2)
        assert os.path.exists(exp_output_path3)

        data = np.load(exp_output_path1)
        assert 'embedding' in data
        assert 'timestamps' in data

        embedding = data['embedding']
        timestamps = data['timestamps']

        # Quick sanity check on data
        assert embedding.ndim == 2
        assert timestamps.ndim == 1

        # Test overwriting
        test_str = "this is a test file"
        with open(exp_output_path1, 'w') as f:
            f.write(test_str)
        openl3.process_audio_file(CHIRP_MONO_PATH, output_dir=test_output_dir,
                                  model=model, overwrite=False)
        with open(exp_output_path1, 'r') as f:
            output_content = f.read()
        # File should not be overwritten
        assert output_content == test_str

        openl3.process_audio_file(CHIRP_MONO_PATH, output_dir=test_output_dir,
                                  model=model, overwrite=True)
        with open(exp_output_path1, 'rb') as f:
            output_content = f.read()
        # File should be overwritten
        assert output_content != test_str
        K.clear_session()

        # Test loading model in function
        openl3.process_audio_file(CHIRP_MONO_PATH, output_dir=test_output_dir,
                                  input_repr="mel256",
                                  content_type="music", embedding_size=512)
        K.clear_session()

    finally:
        shutil.rmtree(test_output_dir)

    # Load a model and pass it in
    model = openl3.models.load_audio_embedding_model("mel256", "music", 512)

    ## Test providing multiple files
    test_output_dir = tempfile.mkdtemp()
    test_subdir = os.path.join(test_output_dir, "subdir")
    os.makedirs(test_subdir)
    path1 = os.path.join(test_subdir, "chirp_1.wav")
    path2 = os.path.join(test_subdir, "chirp_2.wav")
    shutil.copy(CHIRP_MONO_PATH, path1)
    shutil.copy(CHIRP_MONO_PATH, path2)

    try:
        openl3.process_audio_file([path1, path2], output_dir=test_output_dir,
                                  batch_size=4, model=model)
        exp_output_path1 = os.path.join(test_output_dir, "chirp_1.npz")
        exp_output_path2 = os.path.join(test_output_dir, "chirp_2.npz")
        assert os.path.exists(exp_output_path1)
        assert os.path.exists(exp_output_path2)

        data1 = np.load(exp_output_path1)
        data2 = np.load(exp_output_path2)
        assert 'embedding' in data1
        assert 'timestamps' in data1
        assert 'embedding' in data2
        assert 'timestamps' in data2

        embedding1 = data1['embedding']
        timestamps1 = data1['timestamps']
        embedding2 = data2['embedding']
        timestamps2 = data2['timestamps']

        # Quick sanity check on data
        assert embedding1.ndim == 2
        assert timestamps1.ndim == 1
        assert embedding2.ndim == 2
        assert timestamps2.ndim == 1

    finally:
        shutil.rmtree(test_output_dir)

    # Make sure we fail when file cannot be opened
    pytest.raises(OpenL3Error, openl3.process_audio_file,
                  '/fake/directory/asdf.wav', model=model)
    pytest.raises(OpenL3Error, openl3.process_audio_file, None, model=model)
    K.clear_session()
Ejemplo n.º 3
0
Archivo: cli.py Proyecto: Bomme/openl3
def run(modality,
        inputs,
        output_dir=None,
        suffix=None,
        input_repr="mel256",
        content_type="music",
        audio_embedding_size=6144,
        audio_center=True,
        audio_hop_size=0.1,
        audio_batch_size=32,
        image_embedding_size=8192,
        image_batch_size=32,
        overwrite=False,
        verbose=False):
    """
    Computes and saves L3 embedding for given inputs.

    Parameters
    ----------
    modality : str
        String to specify the modalities to be processed: audio, image, or video
    inputs : list of str, or str
        File/directory path or list of file/directory paths to be processed
    output_dir : str or None
        Path to directory for saving output files. If None, output files will
        be saved to the directory containing the input file.
    suffix : str or None
        String to be appended to the output filename, i.e. <base filename>_<suffix>.npy.
        If None, then no suffix will be added, i.e. <base filename>.npy.
    input_repr : "linear", "mel128", or "mel256"
        Spectrogram representation used for model.
    content_type : "music" or "env"
        Type of content used to train embedding.
    audio_embedding_size : 6144 or 512
        Audio embedding dimensionality.
    audio_center : boolean
        If True, pads beginning of signal so timestamps correspond
        to center of window.
    audio_hop_size : float
        Hop size in seconds.
    audio_batch_size : int
        Batch size used for input to audio embedding model
    image_embedding_size : 8192 or 512
        Image embedding dimensionality.
    image_batch_size : int
        Batch size used for input to image embedding model
    overwrite : bool
        If True, overwrites existing output files
    verbose : boolean
        If True, print verbose messages.

    Returns
    -------
    """

    if isinstance(inputs, str):
        file_list = [inputs]
    elif isinstance(inputs, Iterable):
        file_list = get_file_list(inputs)
    else:
        raise OpenL3Error('Invalid input: {}'.format(str(inputs)))

    if len(file_list) == 0:
        print('openl3: No files found in {}. Aborting.'.format(str(inputs)))
        sys.exit(-1)

    # Load model
    if modality == 'audio':
        model = load_audio_embedding_model(input_repr, content_type,
                                           audio_embedding_size)

        # Process all files in the arguments
        process_audio_file(file_list,
                           output_dir=output_dir,
                           suffix=suffix,
                           model=model,
                           center=audio_center,
                           hop_size=audio_hop_size,
                           batch_size=audio_batch_size,
                           overwrite=overwrite,
                           verbose=verbose)
    elif modality == 'image':
        model = load_image_embedding_model(input_repr, content_type,
                                           image_embedding_size)

        # Process all files in the arguments
        process_image_file(file_list,
                           output_dir=output_dir,
                           suffix=suffix,
                           model=model,
                           batch_size=image_batch_size,
                           overwrite=overwrite,
                           verbose=verbose)
    elif modality == 'video':
        audio_model = load_audio_embedding_model(input_repr, content_type,
                                                 audio_embedding_size)
        image_model = load_image_embedding_model(input_repr, content_type,
                                                 image_embedding_size)

        # Process all files in the arguments
        process_video_file(file_list,
                           output_dir=output_dir,
                           suffix=suffix,
                           audio_model=audio_model,
                           image_model=image_model,
                           audio_embedding_size=audio_embedding_size,
                           audio_center=audio_center,
                           audio_hop_size=audio_hop_size,
                           audio_batch_size=audio_batch_size,
                           image_batch_size=image_batch_size,
                           image_embedding_size=image_embedding_size,
                           overwrite=overwrite,
                           verbose=verbose)
    else:
        raise OpenL3Error('Invalid modality: {}'.format(modality))

    if verbose:
        print('openl3: Done!')
Ejemplo n.º 4
0
                                         embedding_size=512)
    return emb


if __name__ == "__main__":

    dir_dev = Path('./data/clotho_audio_files/development')
    dir_eval = Path('./data/clotho_audio_files/evaluation')

    output_dir_dev = Path('./data/openl3/development')
    output_dir_eval = Path('./data/openl3/evaluation')

    model = openl3.models.load_audio_embedding_model(input_repr="mel128",
                                                     content_type="env",
                                                     embedding_size=512)
    dev_files_list = []
    for data_file_name in dir_dev.iterdir():
        dev_files_list.append(str(data_file_name))
    openl3.process_audio_file(dev_files_list,
                              model=model,
                              output_dir=output_dir_dev,
                              batch_size=16)

    eval_files_list = []
    for data_file_name in dir_eval.iterdir():
        eval_files_list.append(str(data_file_name))
    openl3.process_audio_file(eval_files_list,
                              model=model,
                              output_dir=output_dir_eval,
                              batch_size=16)