コード例 #1
0
ファイル: cli.py プロジェクト: Bomme/openl3
def run(modality,
        inputs,
        output_dir=None,
        suffix=None,
        input_repr="mel256",
        content_type="music",
        audio_embedding_size=6144,
        audio_center=True,
        audio_hop_size=0.1,
        audio_batch_size=32,
        image_embedding_size=8192,
        image_batch_size=32,
        overwrite=False,
        verbose=False):
    """
    Computes and saves L3 embedding for given inputs.

    Parameters
    ----------
    modality : str
        String to specify the modalities to be processed: audio, image, or video
    inputs : list of str, or str
        File/directory path or list of file/directory paths to be processed
    output_dir : str or None
        Path to directory for saving output files. If None, output files will
        be saved to the directory containing the input file.
    suffix : str or None
        String to be appended to the output filename, i.e. <base filename>_<suffix>.npy.
        If None, then no suffix will be added, i.e. <base filename>.npy.
    input_repr : "linear", "mel128", or "mel256"
        Spectrogram representation used for model.
    content_type : "music" or "env"
        Type of content used to train embedding.
    audio_embedding_size : 6144 or 512
        Audio embedding dimensionality.
    audio_center : boolean
        If True, pads beginning of signal so timestamps correspond
        to center of window.
    audio_hop_size : float
        Hop size in seconds.
    audio_batch_size : int
        Batch size used for input to audio embedding model
    image_embedding_size : 8192 or 512
        Image embedding dimensionality.
    image_batch_size : int
        Batch size used for input to image embedding model
    overwrite : bool
        If True, overwrites existing output files
    verbose : boolean
        If True, print verbose messages.

    Returns
    -------
    """

    if isinstance(inputs, str):
        file_list = [inputs]
    elif isinstance(inputs, Iterable):
        file_list = get_file_list(inputs)
    else:
        raise OpenL3Error('Invalid input: {}'.format(str(inputs)))

    if len(file_list) == 0:
        print('openl3: No files found in {}. Aborting.'.format(str(inputs)))
        sys.exit(-1)

    # Load model
    if modality == 'audio':
        model = load_audio_embedding_model(input_repr, content_type,
                                           audio_embedding_size)

        # Process all files in the arguments
        process_audio_file(file_list,
                           output_dir=output_dir,
                           suffix=suffix,
                           model=model,
                           center=audio_center,
                           hop_size=audio_hop_size,
                           batch_size=audio_batch_size,
                           overwrite=overwrite,
                           verbose=verbose)
    elif modality == 'image':
        model = load_image_embedding_model(input_repr, content_type,
                                           image_embedding_size)

        # Process all files in the arguments
        process_image_file(file_list,
                           output_dir=output_dir,
                           suffix=suffix,
                           model=model,
                           batch_size=image_batch_size,
                           overwrite=overwrite,
                           verbose=verbose)
    elif modality == 'video':
        audio_model = load_audio_embedding_model(input_repr, content_type,
                                                 audio_embedding_size)
        image_model = load_image_embedding_model(input_repr, content_type,
                                                 image_embedding_size)

        # Process all files in the arguments
        process_video_file(file_list,
                           output_dir=output_dir,
                           suffix=suffix,
                           audio_model=audio_model,
                           image_model=image_model,
                           audio_embedding_size=audio_embedding_size,
                           audio_center=audio_center,
                           audio_hop_size=audio_hop_size,
                           audio_batch_size=audio_batch_size,
                           image_batch_size=image_batch_size,
                           image_embedding_size=image_embedding_size,
                           overwrite=overwrite,
                           verbose=verbose)
    else:
        raise OpenL3Error('Invalid modality: {}'.format(modality))

    if verbose:
        print('openl3: Done!')
コード例 #2
0
ファイル: test_core.py プロジェクト: marl/openl3
def test_process_video_file():
    test_output_dir = tempfile.mkdtemp()
    test_subdir = os.path.join(test_output_dir, "subdir")
    os.makedirs(test_subdir)

    # Load models
    audio_model = openl3.models.load_audio_embedding_model("mel256", "music", 512)
    image_model = openl3.models.load_image_embedding_model("mel256", "music", 512)

    # Make a copy of the file so we can test the case where we save to the same directory
    input_path_alt = os.path.join(test_subdir, "bento.mp4")
    shutil.copy(BENTO_PATH, test_subdir)

    invalid_file_path = os.path.join(test_subdir, "invalid.mp4")
    with open(invalid_file_path, 'w') as f:
        f.write('This is not an video file.')

    exp_audio_output_path1 = os.path.join(test_output_dir, "bento_audio.npz")
    exp_audio_output_path2 = os.path.join(test_output_dir, "bento_audio_suffix.npz")
    exp_audio_output_path3 = os.path.join(test_subdir, "bento_audio.npz")
    exp_image_output_path1 = os.path.join(test_output_dir, "bento_image.npz")
    exp_image_output_path2 = os.path.join(test_output_dir, "bento_image_suffix.npz")
    exp_image_output_path3 = os.path.join(test_subdir, "bento_image.npz")
    try:
        openl3.process_video_file(BENTO_PATH, output_dir=test_output_dir,
                                  audio_model=audio_model,
                                  image_model=image_model)
        openl3.process_video_file(BENTO_PATH, output_dir=test_output_dir,
                                  suffix='suffix',
                                  audio_model=audio_model,
                                  image_model=image_model)
        openl3.process_video_file(input_path_alt, audio_model=audio_model,
                                  image_model=image_model)

        # Make sure we fail when invalid files are provided
        pytest.raises(OpenL3Error, openl3.process_video_file, invalid_file_path,
                      audio_model=audio_model, image_model=image_model)

        # Make sure paths all exist
        assert os.path.exists(exp_audio_output_path1)
        assert os.path.exists(exp_audio_output_path2)
        assert os.path.exists(exp_audio_output_path3)
        assert os.path.exists(exp_image_output_path1)
        assert os.path.exists(exp_image_output_path2)
        assert os.path.exists(exp_image_output_path3)

        audio_data = np.load(exp_audio_output_path1)
        assert 'embedding' in audio_data
        assert 'timestamps' in audio_data

        audio_embedding = audio_data['embedding']
        audio_timestamps = audio_data['timestamps']

        # Quick sanity check on data
        assert audio_embedding.ndim == 2
        assert audio_timestamps.ndim == 1

        image_data = np.load(exp_image_output_path1)
        assert 'embedding' in image_data
        assert 'timestamps' in image_data

        image_embedding = image_data['embedding']
        image_timestamps = image_data['timestamps']

        # Quick sanity check on data
        assert image_embedding.ndim == 2
        assert image_timestamps.ndim == 1

        # Test overwriting
        test_str = "this is a test file"
        with open(exp_audio_output_path1, 'w') as f:
            f.write(test_str)
        with open(exp_image_output_path1, 'w') as f:
            f.write(test_str)
        openl3.process_video_file(BENTO_PATH, output_dir=test_output_dir,
                                  audio_model=audio_model,
                                  image_model=image_model,
                                  overwrite=False)
        with open(exp_audio_output_path1, 'r') as f:
            audio_output_content = f.read()
        with open(exp_image_output_path1, 'r') as f:
            image_output_content = f.read()
        # File should not be overwritten
        assert audio_output_content == test_str
        assert image_output_content == test_str

        openl3.process_video_file(BENTO_PATH, output_dir=test_output_dir,
                                  audio_model=audio_model,
                                  image_model=image_model,
                                  overwrite=True)
        with open(exp_audio_output_path1, 'rb') as f:
            audio_output_content = f.read()
        with open(exp_image_output_path1, 'rb') as f:
            image_output_content = f.read()
        # File should be overwritten
        assert audio_output_content != test_str
        assert image_output_content != test_str

        with open(exp_audio_output_path1, 'w') as f:
            f.write(test_str)
        os.remove(exp_image_output_path1)
        openl3.process_video_file(BENTO_PATH, output_dir=test_output_dir,
                                  audio_model=audio_model,
                                  image_model=image_model,
                                  overwrite=False)
        with open(exp_audio_output_path1, 'r') as f:
            audio_output_content = f.read()
        with open(exp_image_output_path1, 'rb') as f:
            image_output_content = f.read()
        # Audio output should not be overwritten
        assert audio_output_content == test_str
        assert image_output_content != test_str.encode()

        with open(exp_image_output_path1, 'w') as f:
            f.write(test_str)
        os.remove(exp_audio_output_path1)
        openl3.process_video_file(BENTO_PATH, output_dir=test_output_dir,
                                  audio_model=audio_model,
                                  image_model=image_model,
                                  overwrite=False)
        with open(exp_audio_output_path1, 'rb') as f:
            audio_output_content = f.read()
        with open(exp_image_output_path1, 'r') as f:
            image_output_content = f.read()
        # Image output should not be overwritten
        assert audio_output_content != test_str.encode()
        assert image_output_content == test_str

        K.clear_session()

        # Test loading model in function
        openl3.process_video_file(BENTO_PATH, output_dir=test_output_dir,
                                  input_repr="mel256",
                                  content_type="music",
                                  audio_embedding_size=512,
                                  image_embedding_size=512)
        K.clear_session()
    finally:
        shutil.rmtree(test_output_dir)

    # Load models
    audio_model = openl3.models.load_audio_embedding_model("mel256", "music", 512)
    image_model = openl3.models.load_image_embedding_model("mel256", "music", 512)

    ## Test providing multiple files
    test_output_dir = tempfile.mkdtemp()
    test_subdir = os.path.join(test_output_dir, "subdir")
    os.makedirs(test_subdir)
    path1 = os.path.join(test_subdir, "bento_1.wav")
    path2 = os.path.join(test_subdir, "bento_2.wav")
    shutil.copy(BENTO_PATH, path1)
    shutil.copy(BENTO_PATH, path2)

    try:
        openl3.process_video_file([path1, path2], output_dir=test_output_dir,
                                  audio_batch_size=4, image_batch_size=4,
                                  audio_model=audio_model,
                                  image_model=image_model)
        exp_output_audio_path1 = os.path.join(test_output_dir, "bento_1_audio.npz")
        exp_output_audio_path2 = os.path.join(test_output_dir, "bento_2_audio.npz")
        exp_output_image_path1 = os.path.join(test_output_dir, "bento_1_image.npz")
        exp_output_image_path2 = os.path.join(test_output_dir, "bento_2_image.npz")
        assert os.path.exists(exp_output_audio_path1)
        assert os.path.exists(exp_output_audio_path2)
        assert os.path.exists(exp_output_image_path1)
        assert os.path.exists(exp_output_image_path2)

        audio_data1 = np.load(exp_output_audio_path1)
        audio_data2 = np.load(exp_output_audio_path2)
        image_data1 = np.load(exp_output_image_path1)
        image_data2 = np.load(exp_output_image_path2)
        assert 'embedding' in audio_data1
        assert 'timestamps' in audio_data1
        assert 'embedding' in audio_data2
        assert 'timestamps' in audio_data2
        assert 'embedding' in image_data1
        assert 'timestamps' in image_data1
        assert 'embedding' in image_data2
        assert 'timestamps' in image_data2

        audio_embedding1 = audio_data1['embedding']
        audio_timestamps1 = audio_data1['timestamps']
        audio_embedding2 = audio_data2['embedding']
        audio_timestamps2 = audio_data2['timestamps']
        image_embedding1 = image_data1['embedding']
        image_timestamps1 = image_data1['timestamps']
        image_embedding2 = image_data2['embedding']
        image_timestamps2 = image_data2['timestamps']

        # Quick sanity check on data
        assert audio_embedding1.ndim == 2
        assert audio_timestamps1.ndim == 1
        assert audio_embedding2.ndim == 2
        assert audio_timestamps2.ndim == 1
        assert image_embedding1.ndim == 2
        assert image_timestamps1.ndim == 1
        assert image_embedding2.ndim == 2
        assert image_timestamps2.ndim == 1

    finally:
        shutil.rmtree(test_output_dir)

    # Make sure we fail when file cannot be opened
    pytest.raises(OpenL3Error, openl3.process_video_file,
                  '/fake/directory/asdf.mp4', audio_model=audio_model,
                  image_model=image_model)
    pytest.raises(OpenL3Error, openl3.process_video_file, None,
                  audio_model=audio_model, image_model=image_model)
    K.clear_session()