def run(modality, inputs, output_dir=None, suffix=None, input_repr="mel256", content_type="music", audio_embedding_size=6144, audio_center=True, audio_hop_size=0.1, audio_batch_size=32, image_embedding_size=8192, image_batch_size=32, overwrite=False, verbose=False): """ Computes and saves L3 embedding for given inputs. Parameters ---------- modality : str String to specify the modalities to be processed: audio, image, or video inputs : list of str, or str File/directory path or list of file/directory paths to be processed output_dir : str or None Path to directory for saving output files. If None, output files will be saved to the directory containing the input file. suffix : str or None String to be appended to the output filename, i.e. <base filename>_<suffix>.npy. If None, then no suffix will be added, i.e. <base filename>.npy. input_repr : "linear", "mel128", or "mel256" Spectrogram representation used for model. content_type : "music" or "env" Type of content used to train embedding. audio_embedding_size : 6144 or 512 Audio embedding dimensionality. audio_center : boolean If True, pads beginning of signal so timestamps correspond to center of window. audio_hop_size : float Hop size in seconds. audio_batch_size : int Batch size used for input to audio embedding model image_embedding_size : 8192 or 512 Image embedding dimensionality. image_batch_size : int Batch size used for input to image embedding model overwrite : bool If True, overwrites existing output files verbose : boolean If True, print verbose messages. Returns ------- """ if isinstance(inputs, str): file_list = [inputs] elif isinstance(inputs, Iterable): file_list = get_file_list(inputs) else: raise OpenL3Error('Invalid input: {}'.format(str(inputs))) if len(file_list) == 0: print('openl3: No files found in {}. Aborting.'.format(str(inputs))) sys.exit(-1) # Load model if modality == 'audio': model = load_audio_embedding_model(input_repr, content_type, audio_embedding_size) # Process all files in the arguments process_audio_file(file_list, output_dir=output_dir, suffix=suffix, model=model, center=audio_center, hop_size=audio_hop_size, batch_size=audio_batch_size, overwrite=overwrite, verbose=verbose) elif modality == 'image': model = load_image_embedding_model(input_repr, content_type, image_embedding_size) # Process all files in the arguments process_image_file(file_list, output_dir=output_dir, suffix=suffix, model=model, batch_size=image_batch_size, overwrite=overwrite, verbose=verbose) elif modality == 'video': audio_model = load_audio_embedding_model(input_repr, content_type, audio_embedding_size) image_model = load_image_embedding_model(input_repr, content_type, image_embedding_size) # Process all files in the arguments process_video_file(file_list, output_dir=output_dir, suffix=suffix, audio_model=audio_model, image_model=image_model, audio_embedding_size=audio_embedding_size, audio_center=audio_center, audio_hop_size=audio_hop_size, audio_batch_size=audio_batch_size, image_batch_size=image_batch_size, image_embedding_size=image_embedding_size, overwrite=overwrite, verbose=verbose) else: raise OpenL3Error('Invalid modality: {}'.format(modality)) if verbose: print('openl3: Done!')
def test_process_video_file(): test_output_dir = tempfile.mkdtemp() test_subdir = os.path.join(test_output_dir, "subdir") os.makedirs(test_subdir) # Load models audio_model = openl3.models.load_audio_embedding_model("mel256", "music", 512) image_model = openl3.models.load_image_embedding_model("mel256", "music", 512) # Make a copy of the file so we can test the case where we save to the same directory input_path_alt = os.path.join(test_subdir, "bento.mp4") shutil.copy(BENTO_PATH, test_subdir) invalid_file_path = os.path.join(test_subdir, "invalid.mp4") with open(invalid_file_path, 'w') as f: f.write('This is not an video file.') exp_audio_output_path1 = os.path.join(test_output_dir, "bento_audio.npz") exp_audio_output_path2 = os.path.join(test_output_dir, "bento_audio_suffix.npz") exp_audio_output_path3 = os.path.join(test_subdir, "bento_audio.npz") exp_image_output_path1 = os.path.join(test_output_dir, "bento_image.npz") exp_image_output_path2 = os.path.join(test_output_dir, "bento_image_suffix.npz") exp_image_output_path3 = os.path.join(test_subdir, "bento_image.npz") try: openl3.process_video_file(BENTO_PATH, output_dir=test_output_dir, audio_model=audio_model, image_model=image_model) openl3.process_video_file(BENTO_PATH, output_dir=test_output_dir, suffix='suffix', audio_model=audio_model, image_model=image_model) openl3.process_video_file(input_path_alt, audio_model=audio_model, image_model=image_model) # Make sure we fail when invalid files are provided pytest.raises(OpenL3Error, openl3.process_video_file, invalid_file_path, audio_model=audio_model, image_model=image_model) # Make sure paths all exist assert os.path.exists(exp_audio_output_path1) assert os.path.exists(exp_audio_output_path2) assert os.path.exists(exp_audio_output_path3) assert os.path.exists(exp_image_output_path1) assert os.path.exists(exp_image_output_path2) assert os.path.exists(exp_image_output_path3) audio_data = np.load(exp_audio_output_path1) assert 'embedding' in audio_data assert 'timestamps' in audio_data audio_embedding = audio_data['embedding'] audio_timestamps = audio_data['timestamps'] # Quick sanity check on data assert audio_embedding.ndim == 2 assert audio_timestamps.ndim == 1 image_data = np.load(exp_image_output_path1) assert 'embedding' in image_data assert 'timestamps' in image_data image_embedding = image_data['embedding'] image_timestamps = image_data['timestamps'] # Quick sanity check on data assert image_embedding.ndim == 2 assert image_timestamps.ndim == 1 # Test overwriting test_str = "this is a test file" with open(exp_audio_output_path1, 'w') as f: f.write(test_str) with open(exp_image_output_path1, 'w') as f: f.write(test_str) openl3.process_video_file(BENTO_PATH, output_dir=test_output_dir, audio_model=audio_model, image_model=image_model, overwrite=False) with open(exp_audio_output_path1, 'r') as f: audio_output_content = f.read() with open(exp_image_output_path1, 'r') as f: image_output_content = f.read() # File should not be overwritten assert audio_output_content == test_str assert image_output_content == test_str openl3.process_video_file(BENTO_PATH, output_dir=test_output_dir, audio_model=audio_model, image_model=image_model, overwrite=True) with open(exp_audio_output_path1, 'rb') as f: audio_output_content = f.read() with open(exp_image_output_path1, 'rb') as f: image_output_content = f.read() # File should be overwritten assert audio_output_content != test_str assert image_output_content != test_str with open(exp_audio_output_path1, 'w') as f: f.write(test_str) os.remove(exp_image_output_path1) openl3.process_video_file(BENTO_PATH, output_dir=test_output_dir, audio_model=audio_model, image_model=image_model, overwrite=False) with open(exp_audio_output_path1, 'r') as f: audio_output_content = f.read() with open(exp_image_output_path1, 'rb') as f: image_output_content = f.read() # Audio output should not be overwritten assert audio_output_content == test_str assert image_output_content != test_str.encode() with open(exp_image_output_path1, 'w') as f: f.write(test_str) os.remove(exp_audio_output_path1) openl3.process_video_file(BENTO_PATH, output_dir=test_output_dir, audio_model=audio_model, image_model=image_model, overwrite=False) with open(exp_audio_output_path1, 'rb') as f: audio_output_content = f.read() with open(exp_image_output_path1, 'r') as f: image_output_content = f.read() # Image output should not be overwritten assert audio_output_content != test_str.encode() assert image_output_content == test_str K.clear_session() # Test loading model in function openl3.process_video_file(BENTO_PATH, output_dir=test_output_dir, input_repr="mel256", content_type="music", audio_embedding_size=512, image_embedding_size=512) K.clear_session() finally: shutil.rmtree(test_output_dir) # Load models audio_model = openl3.models.load_audio_embedding_model("mel256", "music", 512) image_model = openl3.models.load_image_embedding_model("mel256", "music", 512) ## Test providing multiple files test_output_dir = tempfile.mkdtemp() test_subdir = os.path.join(test_output_dir, "subdir") os.makedirs(test_subdir) path1 = os.path.join(test_subdir, "bento_1.wav") path2 = os.path.join(test_subdir, "bento_2.wav") shutil.copy(BENTO_PATH, path1) shutil.copy(BENTO_PATH, path2) try: openl3.process_video_file([path1, path2], output_dir=test_output_dir, audio_batch_size=4, image_batch_size=4, audio_model=audio_model, image_model=image_model) exp_output_audio_path1 = os.path.join(test_output_dir, "bento_1_audio.npz") exp_output_audio_path2 = os.path.join(test_output_dir, "bento_2_audio.npz") exp_output_image_path1 = os.path.join(test_output_dir, "bento_1_image.npz") exp_output_image_path2 = os.path.join(test_output_dir, "bento_2_image.npz") assert os.path.exists(exp_output_audio_path1) assert os.path.exists(exp_output_audio_path2) assert os.path.exists(exp_output_image_path1) assert os.path.exists(exp_output_image_path2) audio_data1 = np.load(exp_output_audio_path1) audio_data2 = np.load(exp_output_audio_path2) image_data1 = np.load(exp_output_image_path1) image_data2 = np.load(exp_output_image_path2) assert 'embedding' in audio_data1 assert 'timestamps' in audio_data1 assert 'embedding' in audio_data2 assert 'timestamps' in audio_data2 assert 'embedding' in image_data1 assert 'timestamps' in image_data1 assert 'embedding' in image_data2 assert 'timestamps' in image_data2 audio_embedding1 = audio_data1['embedding'] audio_timestamps1 = audio_data1['timestamps'] audio_embedding2 = audio_data2['embedding'] audio_timestamps2 = audio_data2['timestamps'] image_embedding1 = image_data1['embedding'] image_timestamps1 = image_data1['timestamps'] image_embedding2 = image_data2['embedding'] image_timestamps2 = image_data2['timestamps'] # Quick sanity check on data assert audio_embedding1.ndim == 2 assert audio_timestamps1.ndim == 1 assert audio_embedding2.ndim == 2 assert audio_timestamps2.ndim == 1 assert image_embedding1.ndim == 2 assert image_timestamps1.ndim == 1 assert image_embedding2.ndim == 2 assert image_timestamps2.ndim == 1 finally: shutil.rmtree(test_output_dir) # Make sure we fail when file cannot be opened pytest.raises(OpenL3Error, openl3.process_video_file, '/fake/directory/asdf.mp4', audio_model=audio_model, image_model=image_model) pytest.raises(OpenL3Error, openl3.process_video_file, None, audio_model=audio_model, image_model=image_model) K.clear_session()