Exemple #1
0
def test_audio_to_observations(uiowa_root, onset_root, workspace):
    audio_file = os.path.join(
        uiowa_root, "theremin.music.uiowa.edu/sound files/MIS/Brass/tuba"
                    "/Tuba.ff.C3C4.aiff")
    assert os.path.exists(audio_file)
    index = "uiowa78fae0a0"
    onsets_file = os.path.join(onset_root, 'uiowa', "{}.csv".format(index))
    assert os.path.exists(onsets_file)

    output_dir = os.path.join(workspace, 'notes_tmp')
    utils.create_directory(output_dir)

    fext = 'flac'
    observations = SC.audio_to_observations(
        index, audio_file, onsets_file, output_dir, file_ext=fext,
        instrument='Tuba', dataset='uiowa')

    onset_df = pd.read_csv(onsets_file)
    assert len(observations) == len(onset_df)

    coll = model.Collection(observations, output_dir)
    assert coll.validate(verbose=True)
    for obs in coll.values():
        obs.instrument == 'Tuba'

    assert len(set([obs.index for obs in observations])) == len(observations)
def segment_many(index, audio_files, mode, output_directory,
                 num_cpus=-1, verbose=0):
    """Segment a collection of audio files.

    Parameters
    ----------
    audio_files : array_like
        Collection of audio filepaths.

    output_directory : str
        Path at which outputs will be written.

    num_cpus : int, default=-1
        Number of CPUs to use for parallel downloads; -1 for all.

    verbose : int, default=0
        Verbosity level for parallel computation.

    Returns
    -------
    output_paths : list
        Filepaths of generated output, or None for failures.
    """
    utils.create_directory(output_directory)
    pool = Parallel(n_jobs=num_cpus, verbose=verbose)
    fx = delayed(segment_one)
    return pool(fx(idx, afile, mode, output_directory)
                for idx, afile in zip(index, audio_files))
def segment_many(index,
                 audio_files,
                 mode,
                 output_directory,
                 num_cpus=-1,
                 verbose=0):
    """Segment a collection of audio files.

    Parameters
    ----------
    audio_files : array_like
        Collection of audio filepaths.

    output_directory : str
        Path at which outputs will be written.

    num_cpus : int, default=-1
        Number of CPUs to use for parallel downloads; -1 for all.

    verbose : int, default=0
        Verbosity level for parallel computation.

    Returns
    -------
    output_paths : list
        Filepaths of generated output, or None for failures.
    """
    utils.create_directory(output_directory)
    pool = Parallel(n_jobs=num_cpus, verbose=verbose)
    fx = delayed(segment_one)
    return pool(
        fx(idx, afile, mode, output_directory)
        for idx, afile in zip(index, audio_files))
Exemple #4
0
def test_audio_collection_to_observations_no_onsets(uiowa_root, workspace):
    audio_file = os.path.join(
        uiowa_root, "theremin.music.uiowa.edu/sound files/MIS/Brass/tuba"
                    "/Tuba.ff.C3C4.aiff")
    assert os.path.exists(audio_file)
    index = "uiowa78fae0a0"

    output_dir = os.path.join(workspace, 'notes_tmp')
    utils.create_directory(output_dir)

    rec = dict(audio_file=audio_file, onsets_file=None,
               instrument="Tuba", dataset='uiowa', dynamic='ff')
    seg_index = pd.DataFrame.from_records([rec], index=[index])
    seg_file = os.path.join(workspace, 'seg_index.csv')
    seg_index.to_csv(seg_file)

    assert SC.audio_collection_to_observations(
        seg_file, 'empty_note_index.csv', output_dir)
def segment_many(index,
                 audio_files,
                 mode,
                 output_directory,
                 num_cpus=-1,
                 verbose=0):
    """Segment a collection of audio files.

    Parameters
    ----------
    index : array_like
        Index values for the audio files; must be unique!

    audio_files : array_like
        Collection of audio filepaths.

    output_directory : str
        Path at which outputs will be written.

    num_cpus : int, default=-1
        Number of CPUs to use for parallel downloads; -1 for all.

    verbose : int, default=0
        Verbosity level for parallel computation.

    Returns
    -------
    output_paths : list
        Filepaths of generated output, or None for failures.
    """
    if len(set(index)) < len(index):
        raise ValueError("All index values must be unique.")

    logger.info("beginning segmenting {} files with mode={}"
                "".format(len(index), mode))
    utils.create_directory(output_directory)
    pool = Parallel(n_jobs=num_cpus, verbose=verbose)
    fx = delayed(segment_one)
    return pool(
        fx(idx, afile, mode, output_directory)
        for idx, afile in zip(index, audio_files))
def segment_many(index, audio_files, mode, output_directory,
                 num_cpus=-1, verbose=0):
    """Segment a collection of audio files.

    Parameters
    ----------
    index : array_like
        Index values for the audio files; must be unique!

    audio_files : array_like
        Collection of audio filepaths.

    output_directory : str
        Path at which outputs will be written.

    num_cpus : int, default=-1
        Number of CPUs to use for parallel downloads; -1 for all.

    verbose : int, default=0
        Verbosity level for parallel computation.

    Returns
    -------
    output_paths : list
        Filepaths of generated output, or None for failures.
    """
    if len(set(index)) < len(index):
        raise ValueError("All index values must be unique.")

    logger.info("beginning segmenting {} files with mode={}"
                "".format(len(index), mode))
    utils.create_directory(output_directory)
    pool = Parallel(n_jobs=num_cpus, verbose=verbose)
    fx = delayed(segment_one)
    return pool(fx(idx, afile, mode, output_directory)
                for idx, afile in zip(index, audio_files))
def audio_collection_to_observations(segment_index_file, note_index_file,
                                     note_audio_dir, limit_n_files=None,
                                     note_duration=None):
    """
    Parameters
    ----------
    segment_index_file : str
        Input file containing all pointers to audio files and
        onsets files.

    note_index_file: str
        Path to the output index file which will contain pointers
        to the output note audio, and the metadata relating to it.

    note_audio_dir : str
        Path to store the resulting audio file.

    Returns
    -------
    success : bool
        True if the method completed as expected.
    """
    logger.info("Begin audio collection segmentation")
    logger.debug("Loading segment index")
    segment_df = pd.read_csv(segment_index_file, index_col=0)
    logger.debug("loaded {} records.".format(len(segment_df)))

    if segment_df.empty:
        logger.warning(utils.colorize(
            "No data available in {}; exiting.".format(segment_index_file),
            color='red'))
        # Here, we sys.exit 0 so the makefile will continue to build
        # other datasets, even if this one
        return True

    # Drop rows that do not have onsets_files.
    segment_df = segment_df.loc[segment_df.onsets_file.dropna().index]
    utils.create_directory(note_audio_dir)
    count = 0
    observations = []
    for idx, row in segment_df.iterrows():
        if pd.isnull(row.onsets_file):
            logger.warning("No onset file for {} [{}]; moving on.".format(
                row.audio_file, row.dataset))
            continue
        observations += audio_to_observations(
            idx, row.audio_file, row.onsets_file, note_audio_dir,
            file_ext='flac', dataset=row.dataset, instrument=row.instrument,
            dynamic=row.dynamic, note_duration=note_duration)
        logger.debug("Generated {} observations ({} of {}).".format(
            len(observations), (count + 1), len(segment_df)))

        if PRINT_PROGRESS:
            print("Progress: {:0.1f}% ({} of {})\r".format(
                (((count + 1) / float(len(segment_df))) * 100.),
                (count + 1), len(segment_df)), end='')
            sys.stdout.flush()
        count += 1

        if limit_n_files and count >= limit_n_files:
            break

    if PRINT_PROGRESS:
        print()

    collection = model.Collection(observations)
    collection.to_dataframe().to_csv(note_index_file)
    logger.debug("Wrote note index to {} with {} records".format(
        note_index_file, len(collection)))
    logger.info("Completed audio collection segmentation")
    return os.path.exists(note_index_file)