import openl3 import soundfile as sf import os import pickle from multiprocessing import Pool def save_pkl(name,data): f = open(name, "wb") pickle.dump(data,f) f.close() audio_path = '/data/EEV/data-audio/' save_path = '/data/EEV/audio-zzd-feature/' model = openl3.models.load_audio_embedding_model(input_repr="linear", content_type="env", embedding_size=512) feature_dict = {} count = 0 audio_filepath_list = {} for root, dirs, files in os.walk(audio_path, topdown=True): for name in files: if not name[-3:]=='wav': continue src_path = audio_path + '/' + name audio_filepath_list.append(src_path) openl3.process_audio_file(audio_filepath_list, batch_size=32, model=model, hop_size=1/6, output_dir=save_path)
def test_process_audio_file(): test_output_dir = tempfile.mkdtemp() test_subdir = os.path.join(test_output_dir, "subdir") os.makedirs(test_subdir) # Load a model and pass it in model = openl3.models.load_audio_embedding_model("mel256", "music", 512) # Make a copy of the file so we can test the case where we save to the same directory input_path_alt = os.path.join(test_subdir, "chirp_mono.wav") shutil.copy(CHIRP_MONO_PATH, test_subdir) invalid_file_path = os.path.join(test_subdir, "invalid.wav") with open(invalid_file_path, 'w') as f: f.write('This is not an audio file.') exp_output_path1 = os.path.join(test_output_dir, "chirp_mono.npz") exp_output_path2 = os.path.join(test_output_dir, "chirp_mono_suffix.npz") exp_output_path3 = os.path.join(test_subdir, "chirp_mono.npz") try: openl3.process_audio_file(CHIRP_MONO_PATH, output_dir=test_output_dir, model=model) openl3.process_audio_file(CHIRP_MONO_PATH, output_dir=test_output_dir, suffix='suffix', model=model) openl3.process_audio_file(input_path_alt, model=model) # Make sure we fail when invalid files are provided pytest.raises(OpenL3Error, openl3.process_audio_file, invalid_file_path, model=model) # Make sure paths all exist assert os.path.exists(exp_output_path1) assert os.path.exists(exp_output_path2) assert os.path.exists(exp_output_path3) data = np.load(exp_output_path1) assert 'embedding' in data assert 'timestamps' in data embedding = data['embedding'] timestamps = data['timestamps'] # Quick sanity check on data assert embedding.ndim == 2 assert timestamps.ndim == 1 # Test overwriting test_str = "this is a test file" with open(exp_output_path1, 'w') as f: f.write(test_str) openl3.process_audio_file(CHIRP_MONO_PATH, output_dir=test_output_dir, model=model, overwrite=False) with open(exp_output_path1, 'r') as f: output_content = f.read() # File should not be overwritten assert output_content == test_str openl3.process_audio_file(CHIRP_MONO_PATH, output_dir=test_output_dir, model=model, overwrite=True) with open(exp_output_path1, 'rb') as f: output_content = f.read() # File should be overwritten assert output_content != test_str K.clear_session() # Test loading model in function openl3.process_audio_file(CHIRP_MONO_PATH, output_dir=test_output_dir, input_repr="mel256", content_type="music", embedding_size=512) K.clear_session() finally: shutil.rmtree(test_output_dir) # Load a model and pass it in model = openl3.models.load_audio_embedding_model("mel256", "music", 512) ## Test providing multiple files test_output_dir = tempfile.mkdtemp() test_subdir = os.path.join(test_output_dir, "subdir") os.makedirs(test_subdir) path1 = os.path.join(test_subdir, "chirp_1.wav") path2 = os.path.join(test_subdir, "chirp_2.wav") shutil.copy(CHIRP_MONO_PATH, path1) shutil.copy(CHIRP_MONO_PATH, path2) try: openl3.process_audio_file([path1, path2], output_dir=test_output_dir, batch_size=4, model=model) exp_output_path1 = os.path.join(test_output_dir, "chirp_1.npz") exp_output_path2 = os.path.join(test_output_dir, "chirp_2.npz") assert os.path.exists(exp_output_path1) assert os.path.exists(exp_output_path2) data1 = np.load(exp_output_path1) data2 = np.load(exp_output_path2) assert 'embedding' in data1 assert 'timestamps' in data1 assert 'embedding' in data2 assert 'timestamps' in data2 embedding1 = data1['embedding'] timestamps1 = data1['timestamps'] embedding2 = data2['embedding'] timestamps2 = data2['timestamps'] # Quick sanity check on data assert embedding1.ndim == 2 assert timestamps1.ndim == 1 assert embedding2.ndim == 2 assert timestamps2.ndim == 1 finally: shutil.rmtree(test_output_dir) # Make sure we fail when file cannot be opened pytest.raises(OpenL3Error, openl3.process_audio_file, '/fake/directory/asdf.wav', model=model) pytest.raises(OpenL3Error, openl3.process_audio_file, None, model=model) K.clear_session()
def run(modality, inputs, output_dir=None, suffix=None, input_repr="mel256", content_type="music", audio_embedding_size=6144, audio_center=True, audio_hop_size=0.1, audio_batch_size=32, image_embedding_size=8192, image_batch_size=32, overwrite=False, verbose=False): """ Computes and saves L3 embedding for given inputs. Parameters ---------- modality : str String to specify the modalities to be processed: audio, image, or video inputs : list of str, or str File/directory path or list of file/directory paths to be processed output_dir : str or None Path to directory for saving output files. If None, output files will be saved to the directory containing the input file. suffix : str or None String to be appended to the output filename, i.e. <base filename>_<suffix>.npy. If None, then no suffix will be added, i.e. <base filename>.npy. input_repr : "linear", "mel128", or "mel256" Spectrogram representation used for model. content_type : "music" or "env" Type of content used to train embedding. audio_embedding_size : 6144 or 512 Audio embedding dimensionality. audio_center : boolean If True, pads beginning of signal so timestamps correspond to center of window. audio_hop_size : float Hop size in seconds. audio_batch_size : int Batch size used for input to audio embedding model image_embedding_size : 8192 or 512 Image embedding dimensionality. image_batch_size : int Batch size used for input to image embedding model overwrite : bool If True, overwrites existing output files verbose : boolean If True, print verbose messages. Returns ------- """ if isinstance(inputs, str): file_list = [inputs] elif isinstance(inputs, Iterable): file_list = get_file_list(inputs) else: raise OpenL3Error('Invalid input: {}'.format(str(inputs))) if len(file_list) == 0: print('openl3: No files found in {}. Aborting.'.format(str(inputs))) sys.exit(-1) # Load model if modality == 'audio': model = load_audio_embedding_model(input_repr, content_type, audio_embedding_size) # Process all files in the arguments process_audio_file(file_list, output_dir=output_dir, suffix=suffix, model=model, center=audio_center, hop_size=audio_hop_size, batch_size=audio_batch_size, overwrite=overwrite, verbose=verbose) elif modality == 'image': model = load_image_embedding_model(input_repr, content_type, image_embedding_size) # Process all files in the arguments process_image_file(file_list, output_dir=output_dir, suffix=suffix, model=model, batch_size=image_batch_size, overwrite=overwrite, verbose=verbose) elif modality == 'video': audio_model = load_audio_embedding_model(input_repr, content_type, audio_embedding_size) image_model = load_image_embedding_model(input_repr, content_type, image_embedding_size) # Process all files in the arguments process_video_file(file_list, output_dir=output_dir, suffix=suffix, audio_model=audio_model, image_model=image_model, audio_embedding_size=audio_embedding_size, audio_center=audio_center, audio_hop_size=audio_hop_size, audio_batch_size=audio_batch_size, image_batch_size=image_batch_size, image_embedding_size=image_embedding_size, overwrite=overwrite, verbose=verbose) else: raise OpenL3Error('Invalid modality: {}'.format(modality)) if verbose: print('openl3: Done!')
embedding_size=512) return emb if __name__ == "__main__": dir_dev = Path('./data/clotho_audio_files/development') dir_eval = Path('./data/clotho_audio_files/evaluation') output_dir_dev = Path('./data/openl3/development') output_dir_eval = Path('./data/openl3/evaluation') model = openl3.models.load_audio_embedding_model(input_repr="mel128", content_type="env", embedding_size=512) dev_files_list = [] for data_file_name in dir_dev.iterdir(): dev_files_list.append(str(data_file_name)) openl3.process_audio_file(dev_files_list, model=model, output_dir=output_dir_dev, batch_size=16) eval_files_list = [] for data_file_name in dir_eval.iterdir(): eval_files_list.append(str(data_file_name)) openl3.process_audio_file(eval_files_list, model=model, output_dir=output_dir_eval, batch_size=16)