Beispiel #1
0
    def redraw_onset_data(self):
        logger.debug("redrawing onsets")
        if not self.has_onsets:
            logger.debug("Doesn't have any: {}".format(self.onset_data))
            return

        logger.debug("\nOnsets to draw: {}\nHandles: {}".format(
            self.onset_data, self.onset_handles))

        for hnd in self.onset_handles:
            hnd.remove()

        logger.debug("Plotting the following onsets: {}".format(
            self.onset_data))

        self.onset_handles = []
        logger.debug("drawing lines : {}".format(self.onset_times))
        self.onset_handles += [self.axes[0].vlines(
            self.onset_data.time, ymin=-1.05 * self.x_max,
            ymax=1.05 * self.x_max, color='k', alpha=0.5, linewidth=3)]
        for t, i in zip(self.onset_data.time, self.onset_data.index):
            self.onset_handles += [self.axes[0].text(
                x=t, y=self.x_max, s=i, va='top', ha='left', fontsize=16)]

        self.onset_handles += [self.axes[1].vlines(
            self.onset_data.time, ymin=self.envelope.min() * 1.05,
            ymax=0, color='k', alpha=0.5, linewidth=3)]
        for t, i in zip(self.onset_data.time, self.onset_data.index):
            self.onset_handles += [self.axes[1].text(
                x=t, y=-3, s=i, va='top', ha='left', fontsize=16)]
Beispiel #2
0
def annotate_one(row, data_dir, skip_existing=False):
    """Create a canvas from the row information to annotate
    a single audio_file. If a previous onset file exists, loads it first.

    Parameters
    ----------
    row : pd.Series

    data_dir : str
        Path to where to put index files if none given.

    skip_existing : bool
        If edited files exist, skip over them.
    """
    # audio_file, onset_file, output_file=None, title=None,
    logger.info("Annotating:\n{} [idx={}]".format(row.audio_file,
                                                  row.name))

    # Safely try to load the onset_file
    if pd.isnull(row.onsets_file) or not os.path.exists(row.onsets_file):
        onsets = pd.DataFrame([])
        output_file = os.path.join(data_dir, "{}-fix.csv".format(row.name))
        logger.debug("No pre-existing onsets file exists for {}".format(
            row.audio_file))
    else:
        onsets = pd.read_csv(row.onsets_file)
        output_file = row.onsets_file.replace(".csv", "-fix.csv")

    title = "{}| instrument: {}".format(row.name, row['instrument'])
    logger.info("Title: {}".format(title))

    if os.path.exists(output_file):
        if skip_existing:
            return False, False
    t0 = time.time()

    canvas = OnsetCanvas(row.audio_file, output_file, onsets,
                         title=title)
    logger.info("Writing to: {}".format(output_file))
    plt.show(block=True)

    # Should we continue?
    t_end = time.time() - t0
    logger.info("Took {}s to work on {}".format(t_end, row.audio_file))
    return canvas.quit, canvas._mark_for_later
Beispiel #3
0
    def modify_onsets_at_time(self, x, threshold=0.5, create=True):
        """Delete any offsets that occur at time +/- a threshold.

        Returns True if any are deleted, False otherwise.

        Parameters
        ----------
        x : float
            Time in seconds

        threshold : float
            In seconds.
        """
        od = None
        # Debugging checks
        if self.has_onsets:
            if self.onset_times is None:
                logger.error("Onset Times is None!")
        if x is None:
            logger.error("modify_onsets_at_time() - x is None!")
        if threshold is None:
            logger.error("modify_onsets_at_time() - Threshold is None!")

        if (self.has_onsets and
                (np.abs(self.onset_times - x) < threshold).any()):
            # Collision! Remove it
            idx = (np.abs(self.onset_times - x) < threshold).nonzero()[0]
            logger.debug("Collision: {}".format(idx))
            od = self.onset_data.drop(
                pd.Index([self.onset_data.index[idx[0]]]))
        # If there's no onset at this location and create mode is on.
        elif create:
            logger.debug("New datapoint!")
            od = self.onset_data.append(dict(time=x), ignore_index=True)
        if od is not None:
            self.set_onset_data(od)
Beispiel #4
0
    def on_key_press(self, event):
        """Handle key-press events.

        Catches the following:
            x : Write current onset data and close this canvas.
            w : Write current onset data.
            q : Close this canvas without saving.
            spacebar : Toggle a marker at the current mouse position.
            c : Clear all current onsets
        """
        logger.debug('Received: {}'.format(event.key))
        sys.stdout.flush()
        if event.key == 'Q':
            logger.info("Quitting")
            plt.close()
            self._alive = False
            self._quit = True

        elif event.key == 'x':
            logger.info("Saving to: {}".format(self.output_file))
            self.save_onsets()
            plt.close()
            self._alive = False

        elif event.key == 'w':
            logger.info("Saving to: {}".format(self.output_file))
            self.save_onsets()

        elif event.key == 'q':
            logger.info("Closing")
            plt.close()
            self._alive = False

        elif event.key == 'c':
            logger.info("Clearing existing markers")
            self.clear_onsets()

        elif event.key == 'm':
            logger.info("Marking for later")
            self._mark_for_later = True
            plt.close()
            self._alive = False

        elif event.key == ' ':
            x, y = event.xdata, event.ydata
            logger.debug('({:4}, {:4})'.format(x, y))
            self.modify_onsets_at_time(x)

        elif event.key == 'd':
            # Delete in a larger range (1s)
            x, y = event.xdata, event.ydata
            logger.info('({:4}, {:4})'.format(x, y))
            self.modify_onsets_at_time(x, threshold=1.0, create=False)

        elif event.key == 'D':
            # Delete in an even larger range (5s)
            x, y = event.xdata, event.ydata
            logger.info('({:4}, {:4})'.format(x, y))
            self.modify_onsets_at_time(x, threshold=5.0, create=False)

        # Use onset detectors to get started
        elif event.key == '1':
            logger.debug("Getting envelope_onsets(.008)")
            onsets = S.envelope_onsets(self.x, self.fs,
                                       wait=int(self.fs * .008))
            self.set_onset_data(pd.DataFrame(dict(time=onsets)))

        elif event.key == '2':
            # Reset onsets with "envelope_onsets"
            logger.debug("Getting envelope_onsets(.01)")
            onsets = S.envelope_onsets(self.x, self.fs,
                                       wait=int(self.fs * .01))
            self.set_onset_data(pd.DataFrame(dict(time=onsets)))

        elif event.key == '3':
            # Reset onsets with "envelope_onsets"
            logger.debug("Getting envelope_onsets(.02)")
            onsets = S.envelope_onsets(self.x, self.fs,
                                       wait=int(self.fs * .02))
            self.set_onset_data(pd.DataFrame(dict(time=onsets)))

        elif event.key == '4':
            # Reset onsets with "envelope_onsets"
            logger.debug("Getting envelope_onsets(.05)")
            onsets = S.envelope_onsets(self.x, self.fs,
                                       wait=int(self.fs * .05))
            self.set_onset_data(pd.DataFrame(dict(time=onsets)))

        elif event.key == '6':
            # Reset onsets with "logcqt_onsets"
            logger.debug("Getting logcqt_onsets()")
            onsets = S.logcqt_onsets(self.x, self.fs,
                                     wait=int(self.fs * .01))
            self.set_onset_data(pd.DataFrame(dict(time=onsets)))

        elif event.key == '7':
            # Reset onsets with "logcqt_onsets"
            logger.debug("Getting logcqt_onsets()")
            onsets = S.logcqt_onsets(self.x, self.fs,
                                     wait=int(self.fs * .02))
            self.set_onset_data(pd.DataFrame(dict(time=onsets)))

        elif event.key == '0':
            # Reset onsets with "logcqt_onsets"
            logger.debug("Getting hll_onsets()")
            onsets = S.hll_onsets(self.audio_file)
            self.set_onset_data(pd.DataFrame(dict(time=onsets)))

        elif event.key == 'left':
            # Shift all markers to the left (subtract) by 10ms/.01s
            self.shift_onsets(-.01)

        elif event.key == 'right':
            # Shift all markers to the left (subtract) by 10ms/.01s
            self.shift_onsets(.01)

        elif event.key == 'up':
            # Shift all markers to the left (subtract) by 100ms/.1s
            self.shift_onsets(-.1)

        elif event.key == 'down':
            # Shift all markers to the left (subtract) by 100ms/.1s
            self.shift_onsets(.1)
def audio_to_observations(index, audio_file, onsets_file, note_audio_dir,
                          file_ext='flac', note_duration=None, **meta):
    """Segment an audio file given an onset file, writing outputs to disk.

    Paramaters
    ----------
    audio_file : str
        Source audio file.

    onsets_file : str
        Path to a CSV file of cut points.

    note_audio_dir : str
        Path at which to write outputs.

    file_ext : str
        Desired output audio format for note files.

    note_duration : float, or default=None
        Desired duration of the output note files; if None, no fixed duration
        is applied.

    **meta : keyword args
        Additional record data to pass on to each observation; see
        model.Observation for more detail.

    Returns
    -------
    note_files : list of str
        Collection of paths on disk of generated outputs. These will take the
        following format: {note_audio_dir}/{index}.{file_ext}
    """

    # Get the soxi information on this file to get the Duration
    max_length = float(claudio.sox.soxi(audio_file, 'D'))

    # load the onset file.
    onsets = pd.read_csv(onsets_file, index_col=0)
    if onsets.empty:
        logger.warning(
            "Onset File is empty! We can't extract notes without "
            "onsets, so skipping: {}".format(os.path.basename(onsets_file)))
        return []

    # Append the duration to the end of the offsets so we can
    # do this by pairs.
    onsets.loc[onsets.size] = max_length

    # Make sure it's sorted by time now.
    # TODO: Do we really want to drop the index here?
    onsets = onsets.sort_values('time').reset_index(drop=True)
    logger.debug("Attempting to generate {} observations".format(len(onsets)))
    observations = []
    # for each pair of onsets
    for i in range(len(onsets) - 1):
        start_time = onsets.iloc[i]['time']
        if start_time < 0.0:
            start_time = 0.0
        end_time = onsets.iloc[i + 1]['time']
        if end_time > max_length:
            end_time = max_length

        clip_index = utils.generate_id(
            index, "{}".format(start_time), hash_len=6)

        rel_output_file = "{}.{}".format(clip_index, file_ext.strip('.'))
        output_file = os.path.join(note_audio_dir, rel_output_file)

        if signal.extract_clip(audio_file, output_file, start_time,
                               end_time, note_duration):
            obs = model.Observation(
                index=clip_index, audio_file=rel_output_file,
                source_index=index, start_time=start_time,
                duration=end_time - start_time, **meta)
            observations.append(obs)
            logger.debug("New Observation: {}".format(obs.to_builtin()))

    return observations
def audio_collection_to_observations(segment_index_file, note_index_file,
                                     note_audio_dir, limit_n_files=None,
                                     note_duration=None):
    """
    Parameters
    ----------
    segment_index_file : str
        Input file containing all pointers to audio files and
        onsets files.

    note_index_file: str
        Path to the output index file which will contain pointers
        to the output note audio, and the metadata relating to it.

    note_audio_dir : str
        Path to store the resulting audio file.

    Returns
    -------
    success : bool
        True if the method completed as expected.
    """
    logger.info("Begin audio collection segmentation")
    logger.debug("Loading segment index")
    segment_df = pd.read_csv(segment_index_file, index_col=0)
    logger.debug("loaded {} records.".format(len(segment_df)))

    if segment_df.empty:
        logger.warning(utils.colorize(
            "No data available in {}; exiting.".format(segment_index_file),
            color='red'))
        # Here, we sys.exit 0 so the makefile will continue to build
        # other datasets, even if this one
        return True

    # Drop rows that do not have onsets_files.
    segment_df = segment_df.loc[segment_df.onsets_file.dropna().index]
    utils.create_directory(note_audio_dir)
    count = 0
    observations = []
    for idx, row in segment_df.iterrows():
        if pd.isnull(row.onsets_file):
            logger.warning("No onset file for {} [{}]; moving on.".format(
                row.audio_file, row.dataset))
            continue
        observations += audio_to_observations(
            idx, row.audio_file, row.onsets_file, note_audio_dir,
            file_ext='flac', dataset=row.dataset, instrument=row.instrument,
            dynamic=row.dynamic, note_duration=note_duration)
        logger.debug("Generated {} observations ({} of {}).".format(
            len(observations), (count + 1), len(segment_df)))

        if PRINT_PROGRESS:
            print("Progress: {:0.1f}% ({} of {})\r".format(
                (((count + 1) / float(len(segment_df))) * 100.),
                (count + 1), len(segment_df)), end='')
            sys.stdout.flush()
        count += 1

        if limit_n_files and count >= limit_n_files:
            break

    if PRINT_PROGRESS:
        print()

    collection = model.Collection(observations)
    collection.to_dataframe().to_csv(note_index_file)
    logger.debug("Wrote note index to {} with {} records".format(
        note_index_file, len(collection)))
    logger.info("Completed audio collection segmentation")
    return os.path.exists(note_index_file)
    for test_set in result_df.dataset.unique():
        partition_index = os.path.join(destination_dir,
                                       partition_index_fmt.format(test_set))
        success &= train_test_split(output_file, test_set, train_val_split,
                                    partition_index)
    return os.path.exists(output_file) and success


if __name__ == "__main__":
    arguments = docopt(__doc__)

    level = 'INFO' if not arguments.get('--verbose') else 'DEBUG'
    logging.config.dictConfig(minst.logger.get_config(level))

    logger.debug(arguments)

    t0 = time.time()
    if arguments['join']:
        join_note_files(arguments['<sources>'], arguments['--output'])
    elif arguments['split']:
        train_test_split(arguments['<source_index>'][0],
                         # the above requires the [0] because we use
                         # source_index as a list for examples...
                         arguments['<test_set>'],
                         float(arguments['<train_val_split>']),
                         arguments['<output>'])
    elif arguments['example']:
        create_example_dataset(
            arguments['<destination_dir>'],
            arguments['<source_index>'],
    for test_set in result_df.dataset.unique():
        partition_index = os.path.join(destination_dir,
                                       partition_index_fmt.format(test_set))
        success &= train_test_split(output_file, test_set, train_val_split,
                                    partition_index)
    return os.path.exists(output_file) and success


if __name__ == "__main__":
    arguments = docopt(__doc__)

    level = 'INFO' if not arguments.get('--verbose') else 'DEBUG'
    logging.config.dictConfig(minst.logger.get_config(level))

    logger.debug(arguments)

    t0 = time.time()
    if arguments['join']:
        join_note_files(arguments['<sources>'], arguments['--output'])
    elif arguments['split']:
        train_test_split(
            arguments['<source_index>'][0],
            # the above requires the [0] because we use
            # source_index as a list for examples...
            arguments['<test_set>'],
            float(arguments['<train_val_split>']),
            arguments['<output>'])
    elif arguments['example']:
        create_example_dataset(arguments['<destination_dir>'],
                               arguments['<source_index>'],