def redraw_onset_data(self): logger.debug("redrawing onsets") if not self.has_onsets: logger.debug("Doesn't have any: {}".format(self.onset_data)) return logger.debug("\nOnsets to draw: {}\nHandles: {}".format( self.onset_data, self.onset_handles)) for hnd in self.onset_handles: hnd.remove() logger.debug("Plotting the following onsets: {}".format( self.onset_data)) self.onset_handles = [] logger.debug("drawing lines : {}".format(self.onset_times)) self.onset_handles += [self.axes[0].vlines( self.onset_data.time, ymin=-1.05 * self.x_max, ymax=1.05 * self.x_max, color='k', alpha=0.5, linewidth=3)] for t, i in zip(self.onset_data.time, self.onset_data.index): self.onset_handles += [self.axes[0].text( x=t, y=self.x_max, s=i, va='top', ha='left', fontsize=16)] self.onset_handles += [self.axes[1].vlines( self.onset_data.time, ymin=self.envelope.min() * 1.05, ymax=0, color='k', alpha=0.5, linewidth=3)] for t, i in zip(self.onset_data.time, self.onset_data.index): self.onset_handles += [self.axes[1].text( x=t, y=-3, s=i, va='top', ha='left', fontsize=16)]
def annotate_one(row, data_dir, skip_existing=False): """Create a canvas from the row information to annotate a single audio_file. If a previous onset file exists, loads it first. Parameters ---------- row : pd.Series data_dir : str Path to where to put index files if none given. skip_existing : bool If edited files exist, skip over them. """ # audio_file, onset_file, output_file=None, title=None, logger.info("Annotating:\n{} [idx={}]".format(row.audio_file, row.name)) # Safely try to load the onset_file if pd.isnull(row.onsets_file) or not os.path.exists(row.onsets_file): onsets = pd.DataFrame([]) output_file = os.path.join(data_dir, "{}-fix.csv".format(row.name)) logger.debug("No pre-existing onsets file exists for {}".format( row.audio_file)) else: onsets = pd.read_csv(row.onsets_file) output_file = row.onsets_file.replace(".csv", "-fix.csv") title = "{}| instrument: {}".format(row.name, row['instrument']) logger.info("Title: {}".format(title)) if os.path.exists(output_file): if skip_existing: return False, False t0 = time.time() canvas = OnsetCanvas(row.audio_file, output_file, onsets, title=title) logger.info("Writing to: {}".format(output_file)) plt.show(block=True) # Should we continue? t_end = time.time() - t0 logger.info("Took {}s to work on {}".format(t_end, row.audio_file)) return canvas.quit, canvas._mark_for_later
def modify_onsets_at_time(self, x, threshold=0.5, create=True): """Delete any offsets that occur at time +/- a threshold. Returns True if any are deleted, False otherwise. Parameters ---------- x : float Time in seconds threshold : float In seconds. """ od = None # Debugging checks if self.has_onsets: if self.onset_times is None: logger.error("Onset Times is None!") if x is None: logger.error("modify_onsets_at_time() - x is None!") if threshold is None: logger.error("modify_onsets_at_time() - Threshold is None!") if (self.has_onsets and (np.abs(self.onset_times - x) < threshold).any()): # Collision! Remove it idx = (np.abs(self.onset_times - x) < threshold).nonzero()[0] logger.debug("Collision: {}".format(idx)) od = self.onset_data.drop( pd.Index([self.onset_data.index[idx[0]]])) # If there's no onset at this location and create mode is on. elif create: logger.debug("New datapoint!") od = self.onset_data.append(dict(time=x), ignore_index=True) if od is not None: self.set_onset_data(od)
def on_key_press(self, event): """Handle key-press events. Catches the following: x : Write current onset data and close this canvas. w : Write current onset data. q : Close this canvas without saving. spacebar : Toggle a marker at the current mouse position. c : Clear all current onsets """ logger.debug('Received: {}'.format(event.key)) sys.stdout.flush() if event.key == 'Q': logger.info("Quitting") plt.close() self._alive = False self._quit = True elif event.key == 'x': logger.info("Saving to: {}".format(self.output_file)) self.save_onsets() plt.close() self._alive = False elif event.key == 'w': logger.info("Saving to: {}".format(self.output_file)) self.save_onsets() elif event.key == 'q': logger.info("Closing") plt.close() self._alive = False elif event.key == 'c': logger.info("Clearing existing markers") self.clear_onsets() elif event.key == 'm': logger.info("Marking for later") self._mark_for_later = True plt.close() self._alive = False elif event.key == ' ': x, y = event.xdata, event.ydata logger.debug('({:4}, {:4})'.format(x, y)) self.modify_onsets_at_time(x) elif event.key == 'd': # Delete in a larger range (1s) x, y = event.xdata, event.ydata logger.info('({:4}, {:4})'.format(x, y)) self.modify_onsets_at_time(x, threshold=1.0, create=False) elif event.key == 'D': # Delete in an even larger range (5s) x, y = event.xdata, event.ydata logger.info('({:4}, {:4})'.format(x, y)) self.modify_onsets_at_time(x, threshold=5.0, create=False) # Use onset detectors to get started elif event.key == '1': logger.debug("Getting envelope_onsets(.008)") onsets = S.envelope_onsets(self.x, self.fs, wait=int(self.fs * .008)) self.set_onset_data(pd.DataFrame(dict(time=onsets))) elif event.key == '2': # Reset onsets with "envelope_onsets" logger.debug("Getting envelope_onsets(.01)") onsets = S.envelope_onsets(self.x, self.fs, wait=int(self.fs * .01)) self.set_onset_data(pd.DataFrame(dict(time=onsets))) elif event.key == '3': # Reset onsets with "envelope_onsets" logger.debug("Getting envelope_onsets(.02)") onsets = S.envelope_onsets(self.x, self.fs, wait=int(self.fs * .02)) self.set_onset_data(pd.DataFrame(dict(time=onsets))) elif event.key == '4': # Reset onsets with "envelope_onsets" logger.debug("Getting envelope_onsets(.05)") onsets = S.envelope_onsets(self.x, self.fs, wait=int(self.fs * .05)) self.set_onset_data(pd.DataFrame(dict(time=onsets))) elif event.key == '6': # Reset onsets with "logcqt_onsets" logger.debug("Getting logcqt_onsets()") onsets = S.logcqt_onsets(self.x, self.fs, wait=int(self.fs * .01)) self.set_onset_data(pd.DataFrame(dict(time=onsets))) elif event.key == '7': # Reset onsets with "logcqt_onsets" logger.debug("Getting logcqt_onsets()") onsets = S.logcqt_onsets(self.x, self.fs, wait=int(self.fs * .02)) self.set_onset_data(pd.DataFrame(dict(time=onsets))) elif event.key == '0': # Reset onsets with "logcqt_onsets" logger.debug("Getting hll_onsets()") onsets = S.hll_onsets(self.audio_file) self.set_onset_data(pd.DataFrame(dict(time=onsets))) elif event.key == 'left': # Shift all markers to the left (subtract) by 10ms/.01s self.shift_onsets(-.01) elif event.key == 'right': # Shift all markers to the left (subtract) by 10ms/.01s self.shift_onsets(.01) elif event.key == 'up': # Shift all markers to the left (subtract) by 100ms/.1s self.shift_onsets(-.1) elif event.key == 'down': # Shift all markers to the left (subtract) by 100ms/.1s self.shift_onsets(.1)
def audio_to_observations(index, audio_file, onsets_file, note_audio_dir, file_ext='flac', note_duration=None, **meta): """Segment an audio file given an onset file, writing outputs to disk. Paramaters ---------- audio_file : str Source audio file. onsets_file : str Path to a CSV file of cut points. note_audio_dir : str Path at which to write outputs. file_ext : str Desired output audio format for note files. note_duration : float, or default=None Desired duration of the output note files; if None, no fixed duration is applied. **meta : keyword args Additional record data to pass on to each observation; see model.Observation for more detail. Returns ------- note_files : list of str Collection of paths on disk of generated outputs. These will take the following format: {note_audio_dir}/{index}.{file_ext} """ # Get the soxi information on this file to get the Duration max_length = float(claudio.sox.soxi(audio_file, 'D')) # load the onset file. onsets = pd.read_csv(onsets_file, index_col=0) if onsets.empty: logger.warning( "Onset File is empty! We can't extract notes without " "onsets, so skipping: {}".format(os.path.basename(onsets_file))) return [] # Append the duration to the end of the offsets so we can # do this by pairs. onsets.loc[onsets.size] = max_length # Make sure it's sorted by time now. # TODO: Do we really want to drop the index here? onsets = onsets.sort_values('time').reset_index(drop=True) logger.debug("Attempting to generate {} observations".format(len(onsets))) observations = [] # for each pair of onsets for i in range(len(onsets) - 1): start_time = onsets.iloc[i]['time'] if start_time < 0.0: start_time = 0.0 end_time = onsets.iloc[i + 1]['time'] if end_time > max_length: end_time = max_length clip_index = utils.generate_id( index, "{}".format(start_time), hash_len=6) rel_output_file = "{}.{}".format(clip_index, file_ext.strip('.')) output_file = os.path.join(note_audio_dir, rel_output_file) if signal.extract_clip(audio_file, output_file, start_time, end_time, note_duration): obs = model.Observation( index=clip_index, audio_file=rel_output_file, source_index=index, start_time=start_time, duration=end_time - start_time, **meta) observations.append(obs) logger.debug("New Observation: {}".format(obs.to_builtin())) return observations
def audio_collection_to_observations(segment_index_file, note_index_file, note_audio_dir, limit_n_files=None, note_duration=None): """ Parameters ---------- segment_index_file : str Input file containing all pointers to audio files and onsets files. note_index_file: str Path to the output index file which will contain pointers to the output note audio, and the metadata relating to it. note_audio_dir : str Path to store the resulting audio file. Returns ------- success : bool True if the method completed as expected. """ logger.info("Begin audio collection segmentation") logger.debug("Loading segment index") segment_df = pd.read_csv(segment_index_file, index_col=0) logger.debug("loaded {} records.".format(len(segment_df))) if segment_df.empty: logger.warning(utils.colorize( "No data available in {}; exiting.".format(segment_index_file), color='red')) # Here, we sys.exit 0 so the makefile will continue to build # other datasets, even if this one return True # Drop rows that do not have onsets_files. segment_df = segment_df.loc[segment_df.onsets_file.dropna().index] utils.create_directory(note_audio_dir) count = 0 observations = [] for idx, row in segment_df.iterrows(): if pd.isnull(row.onsets_file): logger.warning("No onset file for {} [{}]; moving on.".format( row.audio_file, row.dataset)) continue observations += audio_to_observations( idx, row.audio_file, row.onsets_file, note_audio_dir, file_ext='flac', dataset=row.dataset, instrument=row.instrument, dynamic=row.dynamic, note_duration=note_duration) logger.debug("Generated {} observations ({} of {}).".format( len(observations), (count + 1), len(segment_df))) if PRINT_PROGRESS: print("Progress: {:0.1f}% ({} of {})\r".format( (((count + 1) / float(len(segment_df))) * 100.), (count + 1), len(segment_df)), end='') sys.stdout.flush() count += 1 if limit_n_files and count >= limit_n_files: break if PRINT_PROGRESS: print() collection = model.Collection(observations) collection.to_dataframe().to_csv(note_index_file) logger.debug("Wrote note index to {} with {} records".format( note_index_file, len(collection))) logger.info("Completed audio collection segmentation") return os.path.exists(note_index_file)
for test_set in result_df.dataset.unique(): partition_index = os.path.join(destination_dir, partition_index_fmt.format(test_set)) success &= train_test_split(output_file, test_set, train_val_split, partition_index) return os.path.exists(output_file) and success if __name__ == "__main__": arguments = docopt(__doc__) level = 'INFO' if not arguments.get('--verbose') else 'DEBUG' logging.config.dictConfig(minst.logger.get_config(level)) logger.debug(arguments) t0 = time.time() if arguments['join']: join_note_files(arguments['<sources>'], arguments['--output']) elif arguments['split']: train_test_split(arguments['<source_index>'][0], # the above requires the [0] because we use # source_index as a list for examples... arguments['<test_set>'], float(arguments['<train_val_split>']), arguments['<output>']) elif arguments['example']: create_example_dataset( arguments['<destination_dir>'], arguments['<source_index>'],
for test_set in result_df.dataset.unique(): partition_index = os.path.join(destination_dir, partition_index_fmt.format(test_set)) success &= train_test_split(output_file, test_set, train_val_split, partition_index) return os.path.exists(output_file) and success if __name__ == "__main__": arguments = docopt(__doc__) level = 'INFO' if not arguments.get('--verbose') else 'DEBUG' logging.config.dictConfig(minst.logger.get_config(level)) logger.debug(arguments) t0 = time.time() if arguments['join']: join_note_files(arguments['<sources>'], arguments['--output']) elif arguments['split']: train_test_split( arguments['<source_index>'][0], # the above requires the [0] because we use # source_index as a list for examples... arguments['<test_set>'], float(arguments['<train_val_split>']), arguments['<output>']) elif arguments['example']: create_example_dataset(arguments['<destination_dir>'], arguments['<source_index>'],