def run(modality, inputs, output_dir=None, suffix=None, input_repr="mel256", content_type="music", audio_embedding_size=6144, audio_center=True, audio_hop_size=0.1, audio_batch_size=32, image_embedding_size=8192, image_batch_size=32, overwrite=False, verbose=False): """ Computes and saves L3 embedding for given inputs. Parameters ---------- modality : str String to specify the modalities to be processed: audio, image, or video inputs : list of str, or str File/directory path or list of file/directory paths to be processed output_dir : str or None Path to directory for saving output files. If None, output files will be saved to the directory containing the input file. suffix : str or None String to be appended to the output filename, i.e. <base filename>_<suffix>.npy. If None, then no suffix will be added, i.e. <base filename>.npy. input_repr : "linear", "mel128", or "mel256" Spectrogram representation used for model. content_type : "music" or "env" Type of content used to train embedding. audio_embedding_size : 6144 or 512 Audio embedding dimensionality. audio_center : boolean If True, pads beginning of signal so timestamps correspond to center of window. audio_hop_size : float Hop size in seconds. audio_batch_size : int Batch size used for input to audio embedding model image_embedding_size : 8192 or 512 Image embedding dimensionality. image_batch_size : int Batch size used for input to image embedding model overwrite : bool If True, overwrites existing output files verbose : boolean If True, print verbose messages. Returns ------- """ if isinstance(inputs, str): file_list = [inputs] elif isinstance(inputs, Iterable): file_list = get_file_list(inputs) else: raise OpenL3Error('Invalid input: {}'.format(str(inputs))) if len(file_list) == 0: print('openl3: No files found in {}. Aborting.'.format(str(inputs))) sys.exit(-1) # Load model if modality == 'audio': model = load_audio_embedding_model(input_repr, content_type, audio_embedding_size) # Process all files in the arguments process_audio_file(file_list, output_dir=output_dir, suffix=suffix, model=model, center=audio_center, hop_size=audio_hop_size, batch_size=audio_batch_size, overwrite=overwrite, verbose=verbose) elif modality == 'image': model = load_image_embedding_model(input_repr, content_type, image_embedding_size) # Process all files in the arguments process_image_file(file_list, output_dir=output_dir, suffix=suffix, model=model, batch_size=image_batch_size, overwrite=overwrite, verbose=verbose) elif modality == 'video': audio_model = load_audio_embedding_model(input_repr, content_type, audio_embedding_size) image_model = load_image_embedding_model(input_repr, content_type, image_embedding_size) # Process all files in the arguments process_video_file(file_list, output_dir=output_dir, suffix=suffix, audio_model=audio_model, image_model=image_model, audio_embedding_size=audio_embedding_size, audio_center=audio_center, audio_hop_size=audio_hop_size, audio_batch_size=audio_batch_size, image_batch_size=image_batch_size, image_embedding_size=image_embedding_size, overwrite=overwrite, verbose=verbose) else: raise OpenL3Error('Invalid modality: {}'.format(modality)) if verbose: print('openl3: Done!')
def test_get_image_embedding_model(input_repr, content_type, embedding_size, ref_image_model): m = load_image_embedding_model(input_repr, content_type, embedding_size) _compare_models(m, ref_image_model, IMAGE_INPUT_REPR_SIZES[input_repr], embedding_size)
def ref_image_model(): input_repr, content_type, embedding_size = 'linear', 'music', 8192 m = load_image_embedding_model(input_repr, content_type, embedding_size) assert m.output_shape[1] == embedding_size return m
def test_load_image_embedding_model(): m = load_image_embedding_model('linear', 'music', 8192) assert m.output_shape[1] == 8192 first_model = m m = load_image_embedding_model('linear', 'music', 512) assert m.output_shape[1] == 512 assert len(m.layers) == len(first_model.layers) assert all([ isinstance(l1, type(l2)) for (l1, l2) in zip(m.layers, first_model.layers) ]) m = load_image_embedding_model('linear', 'env', 8192) assert m.output_shape[1] == 8192 assert len(m.layers) == len(first_model.layers) assert all([ isinstance(l1, type(l2)) for (l1, l2) in zip(m.layers, first_model.layers) ]) m = load_image_embedding_model('linear', 'env', 512) assert m.output_shape[1] == 512 assert len(m.layers) == len(first_model.layers) assert all([ isinstance(l1, type(l2)) for (l1, l2) in zip(m.layers, first_model.layers) ]) m = load_image_embedding_model('mel128', 'music', 8192) assert m.output_shape[1] == 8192 assert len(m.layers) == len(first_model.layers) assert all([ isinstance(l1, type(l2)) for (l1, l2) in zip(m.layers, first_model.layers) ]) m = load_image_embedding_model('mel128', 'music', 512) assert m.output_shape[1] == 512 assert len(m.layers) == len(first_model.layers) assert all([ isinstance(l1, type(l2)) for (l1, l2) in zip(m.layers, first_model.layers) ]) m = load_image_embedding_model('mel128', 'env', 8192) assert m.output_shape[1] == 8192 assert len(m.layers) == len(first_model.layers) assert all([ isinstance(l1, type(l2)) for (l1, l2) in zip(m.layers, first_model.layers) ]) m = load_image_embedding_model('mel128', 'env', 512) assert m.output_shape[1] == 512 assert len(m.layers) == len(first_model.layers) assert all([ isinstance(l1, type(l2)) for (l1, l2) in zip(m.layers, first_model.layers) ]) m = load_image_embedding_model('mel256', 'music', 8192) assert m.output_shape[1] == 8192 assert len(m.layers) == len(first_model.layers) assert all([ isinstance(l1, type(l2)) for (l1, l2) in zip(m.layers, first_model.layers) ]) m = load_image_embedding_model('mel256', 'music', 512) assert m.output_shape[1] == 512 assert len(m.layers) == len(first_model.layers) assert all([ isinstance(l1, type(l2)) for (l1, l2) in zip(m.layers, first_model.layers) ]) m = load_image_embedding_model('mel256', 'env', 8192) assert m.output_shape[1] == 8192 assert len(m.layers) == len(first_model.layers) assert all([ isinstance(l1, type(l2)) for (l1, l2) in zip(m.layers, first_model.layers) ]) m = load_image_embedding_model('mel256', 'env', 512) assert m.output_shape[1] == 512 assert len(m.layers) == len(first_model.layers) assert all([ isinstance(l1, type(l2)) for (l1, l2) in zip(m.layers, first_model.layers) ])