def test_audio_to_observations(uiowa_root, onset_root, workspace): audio_file = os.path.join( uiowa_root, "theremin.music.uiowa.edu/sound files/MIS/Brass/tuba" "/Tuba.ff.C3C4.aiff") assert os.path.exists(audio_file) index = "uiowa78fae0a0" onsets_file = os.path.join(onset_root, 'uiowa', "{}.csv".format(index)) assert os.path.exists(onsets_file) output_dir = os.path.join(workspace, 'notes_tmp') utils.create_directory(output_dir) fext = 'flac' observations = SC.audio_to_observations( index, audio_file, onsets_file, output_dir, file_ext=fext, instrument='Tuba', dataset='uiowa') onset_df = pd.read_csv(onsets_file) assert len(observations) == len(onset_df) coll = model.Collection(observations, output_dir) assert coll.validate(verbose=True) for obs in coll.values(): obs.instrument == 'Tuba' assert len(set([obs.index for obs in observations])) == len(observations)
def segment_many(index, audio_files, mode, output_directory, num_cpus=-1, verbose=0): """Segment a collection of audio files. Parameters ---------- audio_files : array_like Collection of audio filepaths. output_directory : str Path at which outputs will be written. num_cpus : int, default=-1 Number of CPUs to use for parallel downloads; -1 for all. verbose : int, default=0 Verbosity level for parallel computation. Returns ------- output_paths : list Filepaths of generated output, or None for failures. """ utils.create_directory(output_directory) pool = Parallel(n_jobs=num_cpus, verbose=verbose) fx = delayed(segment_one) return pool(fx(idx, afile, mode, output_directory) for idx, afile in zip(index, audio_files))
def segment_many(index, audio_files, mode, output_directory, num_cpus=-1, verbose=0): """Segment a collection of audio files. Parameters ---------- audio_files : array_like Collection of audio filepaths. output_directory : str Path at which outputs will be written. num_cpus : int, default=-1 Number of CPUs to use for parallel downloads; -1 for all. verbose : int, default=0 Verbosity level for parallel computation. Returns ------- output_paths : list Filepaths of generated output, or None for failures. """ utils.create_directory(output_directory) pool = Parallel(n_jobs=num_cpus, verbose=verbose) fx = delayed(segment_one) return pool( fx(idx, afile, mode, output_directory) for idx, afile in zip(index, audio_files))
def test_audio_collection_to_observations_no_onsets(uiowa_root, workspace): audio_file = os.path.join( uiowa_root, "theremin.music.uiowa.edu/sound files/MIS/Brass/tuba" "/Tuba.ff.C3C4.aiff") assert os.path.exists(audio_file) index = "uiowa78fae0a0" output_dir = os.path.join(workspace, 'notes_tmp') utils.create_directory(output_dir) rec = dict(audio_file=audio_file, onsets_file=None, instrument="Tuba", dataset='uiowa', dynamic='ff') seg_index = pd.DataFrame.from_records([rec], index=[index]) seg_file = os.path.join(workspace, 'seg_index.csv') seg_index.to_csv(seg_file) assert SC.audio_collection_to_observations( seg_file, 'empty_note_index.csv', output_dir)
def segment_many(index, audio_files, mode, output_directory, num_cpus=-1, verbose=0): """Segment a collection of audio files. Parameters ---------- index : array_like Index values for the audio files; must be unique! audio_files : array_like Collection of audio filepaths. output_directory : str Path at which outputs will be written. num_cpus : int, default=-1 Number of CPUs to use for parallel downloads; -1 for all. verbose : int, default=0 Verbosity level for parallel computation. Returns ------- output_paths : list Filepaths of generated output, or None for failures. """ if len(set(index)) < len(index): raise ValueError("All index values must be unique.") logger.info("beginning segmenting {} files with mode={}" "".format(len(index), mode)) utils.create_directory(output_directory) pool = Parallel(n_jobs=num_cpus, verbose=verbose) fx = delayed(segment_one) return pool( fx(idx, afile, mode, output_directory) for idx, afile in zip(index, audio_files))
def segment_many(index, audio_files, mode, output_directory, num_cpus=-1, verbose=0): """Segment a collection of audio files. Parameters ---------- index : array_like Index values for the audio files; must be unique! audio_files : array_like Collection of audio filepaths. output_directory : str Path at which outputs will be written. num_cpus : int, default=-1 Number of CPUs to use for parallel downloads; -1 for all. verbose : int, default=0 Verbosity level for parallel computation. Returns ------- output_paths : list Filepaths of generated output, or None for failures. """ if len(set(index)) < len(index): raise ValueError("All index values must be unique.") logger.info("beginning segmenting {} files with mode={}" "".format(len(index), mode)) utils.create_directory(output_directory) pool = Parallel(n_jobs=num_cpus, verbose=verbose) fx = delayed(segment_one) return pool(fx(idx, afile, mode, output_directory) for idx, afile in zip(index, audio_files))
def audio_collection_to_observations(segment_index_file, note_index_file, note_audio_dir, limit_n_files=None, note_duration=None): """ Parameters ---------- segment_index_file : str Input file containing all pointers to audio files and onsets files. note_index_file: str Path to the output index file which will contain pointers to the output note audio, and the metadata relating to it. note_audio_dir : str Path to store the resulting audio file. Returns ------- success : bool True if the method completed as expected. """ logger.info("Begin audio collection segmentation") logger.debug("Loading segment index") segment_df = pd.read_csv(segment_index_file, index_col=0) logger.debug("loaded {} records.".format(len(segment_df))) if segment_df.empty: logger.warning(utils.colorize( "No data available in {}; exiting.".format(segment_index_file), color='red')) # Here, we sys.exit 0 so the makefile will continue to build # other datasets, even if this one return True # Drop rows that do not have onsets_files. segment_df = segment_df.loc[segment_df.onsets_file.dropna().index] utils.create_directory(note_audio_dir) count = 0 observations = [] for idx, row in segment_df.iterrows(): if pd.isnull(row.onsets_file): logger.warning("No onset file for {} [{}]; moving on.".format( row.audio_file, row.dataset)) continue observations += audio_to_observations( idx, row.audio_file, row.onsets_file, note_audio_dir, file_ext='flac', dataset=row.dataset, instrument=row.instrument, dynamic=row.dynamic, note_duration=note_duration) logger.debug("Generated {} observations ({} of {}).".format( len(observations), (count + 1), len(segment_df))) if PRINT_PROGRESS: print("Progress: {:0.1f}% ({} of {})\r".format( (((count + 1) / float(len(segment_df))) * 100.), (count + 1), len(segment_df)), end='') sys.stdout.flush() count += 1 if limit_n_files and count >= limit_n_files: break if PRINT_PROGRESS: print() collection = model.Collection(observations) collection.to_dataframe().to_csv(note_index_file) logger.debug("Wrote note index to {} with {} records".format( note_index_file, len(collection))) logger.info("Completed audio collection segmentation") return os.path.exists(note_index_file)