def draw_onset_data(audio_file, onset_data, title): x, fs = claudio.read(audio_file, samplerate=22050, channels=1, bytedepth=2) fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(12, 6)) nhop = 100 x_max = np.abs(x).max() trange = np.arange(0, len(x), nhop) / float(fs) axes[0].plot(trange, x.flatten()[::nhop]) if not onset_data.empty: axes[0].vlines(onset_data.time, ymin=-1.05*x_max, ymax=1.05*x_max, color='k', alpha=0.5, linewidth=3) log_env_lpf = S.log_envelope(x, fs, 100) axes[1].plot(trange, log_env_lpf[::nhop]) if not onset_data.empty: axes[1].vlines(onset_data.time, ymin=log_env_lpf.min()*1.05, ymax=0, color='k', alpha=0.5, linewidth=3) for ax in axes: ax.set_xlim(0, trange.max()) ax.set_xlabel("Time (sec)") axes[0].set_title(title) return fig
def __init__(self, audio_file, output_file, onset_data=None, nhop=100, group=None, target=None, title=None, verbose=None): self.fig, self.axes = plt.subplots(nrows=2, ncols=1, figsize=(16, 6)) x, fs = claudio.read(audio_file, samplerate=22050, channels=1, bytedepth=2) onset_data = pd.DataFrame([]) if onset_data is None else onset_data self.output_file = output_file self.x_max = np.abs(x).max() self.trange = np.arange(0, len(x), nhop) / float(fs) self.waveform = x.flatten()[::nhop] self.envelope = S.log_envelope(x, fs, nhop)[::nhop] # self.onset_data = onset_data self.wave_handle = self.axes[0].plot(self.trange, self.waveform) self.env_handle = self.axes[1].plot(self.trange, self.envelope) self.onset_handles = [] self.refresh_xlim() title = '' if not title else title title = "{}\nx: write and close / w: write / q: close" self.axes[0].set_title(title) self.set_onset_data(onset_data) self.fig.canvas.mpl_connect('key_press_event', self.on_key_press) self._alive = False
def test_harmonic_cqt_uneven_length(workspace): input_file = os.path.join(DIRNAME, "uneven_hcqt.flac") x, fs = claudio.read(input_file, samplerate=22050, channels=1) spec = CQT.harmonic_cqt(x, fs, n_harmonics=6, n_bins=144, bins_per_octave=24) assert spec.ndim == 4 assert np.abs(spec).sum() > 0
def segment(audio_file, mode, db_delta_thresh=2.5, **kwargs): x, fs = claudio.read(audio_file, samplerate=22050, channels=1, bytedepth=2) if mode == 'hll': onset_times = hll_onsets(audio_file) else: onset_times = ONSETS.get(mode)(x, fs, **kwargs) onset_idx = librosa.time_to_samples(onset_times, fs) log_env_lpf = log_envelope(x, fs, 100) recs = [] for time, idx in zip(onset_times, onset_idx): x_m = log_env_lpf[idx: idx + int(fs)] rec = dict(time=time, env_max=x_m.max(), env_mean=x_m.mean(), env_std=x_m.std(), env_delta=x_m.max() - log_env_lpf.mean()) if rec['env_delta'] > db_delta_thresh: recs += [rec] return pd.DataFrame.from_records(recs)
def __init__(self, audio_file, output_file, onset_data=None, nhop=100, group=None, target=None, title=None, verbose=None): self.fig, self.axes = plt.subplots(nrows=3, ncols=1, figsize=(20, 6)) self.audio_file = audio_file self.x, self.fs = claudio.read(audio_file, samplerate=22050, channels=1, bytedepth=2) onset_data = pd.DataFrame([]) if onset_data is None else onset_data self.output_file = output_file self.x_max = np.abs(self.x).max() self.trange = np.arange(0, len(self.x), nhop) / float(self.fs) self.waveform = self.x.flatten()[::nhop] self.envelope = S.log_envelope(self.x, self.fs, nhop)[::nhop] self.lcqt = S.logcqt(self.x, self.fs) # self.onset_data = set_onset_data self.wave_handle = self.axes[0].plot(self.trange, self.waveform) self.env_handle = self.axes[1].plot(self.trange, self.envelope) self.lcqt_handle = self.axes[2].imshow(self.lcqt, aspect='auto', origin='lower', interpolation='nearest') self.onset_handles = [] self.refresh_xlim() title = '' if not title else title title = ("{}\nx: write and close / w: write / q: close / c: clear / " "SPACE: set/delete marker\n" "1: envelope onsets / 2: logcqt onsets".format(title)) self.axes[0].set_title(title) self.set_onset_data(onset_data) self.fig.canvas.mpl_connect('key_press_event', self.on_key_press) self._alive = False self._quit = False self._mark_for_later = False
def segment(audio_file, mode, db_delta_thresh=2.5, **kwargs): x, fs = claudio.read(audio_file, samplerate=22050, channels=1, bytedepth=2) if mode == 'hll': onset_times = hll_onsets(audio_file) else: onset_times = ONSETS.get(mode)(x, fs, **kwargs) onset_idx = librosa.time_to_samples(onset_times, fs) log_env_lpf = log_envelope(x, fs, 100) recs = [] for time, idx in zip(onset_times, onset_idx): x_m = log_env_lpf[idx:idx + int(fs)] rec = dict(time=time, env_max=x_m.max(), env_mean=x_m.mean(), env_std=x_m.std(), env_delta=x_m.max() - log_env_lpf.mean()) if rec['env_delta'] > db_delta_thresh: recs += [rec] return pd.DataFrame.from_records(recs)
def cqt_one(input_file, output_file, cqt_params=None, audio_params=None, harmonic_params=None, skip_existing=True): """Compute the CQT for a input/output file Pair. Parameters ---------- input_file : str Audio file to apply the CQT output_file : str Path to write the output. cqt_params : dict, default=None Parameters for the CQT function. See `librosa.cqt`. audio_params : dict, default=None Parameters for reading the audio file. See `claudio.read`. harmonic_params : dict, default=None Parameters for the `harmonic_cqt` function, which will update those in cqt_params. skip_existing : bool, default=True Skip outputs that exist. Returns ------- success : bool True if the output file was successfully created. """ input_exists, output_exists = [os.path.exists(f) for f in (input_file, output_file)] if not input_exists: logger.warning("[{0}] Input file doesn't exist, skipping: {1}" "".format(time.asctime(), input_file)) return input_exists if skip_existing and output_exists: logger.info("[{0}] Output file exists, skipping: {1}" "".format(time.asctime(), output_file)) return output_exists logger.debug("[{0}] Starting {1}".format(time.asctime(), input_file)) if not cqt_params: cqt_params = CQT_PARAMS.copy() if not audio_params: audio_params = AUDIO_PARAMS.copy() if not harmonic_params: harmonic_params = HARMONIC_PARAMS.copy() logger.debug("[{0}] Audio conversion {1}".format( time.asctime(), input_file)) try: x, fs = claudio.read(input_file, **audio_params) if len(x) <= 0: logger.error("Bad Input signal length={} for audio {}".format( len(x), input_file)) return False logger.debug("[{0}] Computing features {1}".format( time.asctime(), input_file)) cqt_spectra = np.array([np.abs(librosa.cqt(x_c, sr=fs, **cqt_params).T) for x_c in x.T]) cqt_params.update(**harmonic_params) harm_spectra = harmonic_cqt(x, fs, **cqt_params) frame_idx = np.arange(cqt_spectra.shape[1]) time_points = librosa.frames_to_time( frame_idx, sr=fs, hop_length=cqt_params['hop_length']) logger.debug("[{0}] Saving: {1}".format(time.asctime(), output_file)) np.savez( output_file, time_points=time_points, cqt=np.abs(cqt_spectra).astype(np.float32), harmonic_cqt=np.abs(harm_spectra).astype(np.float32)) except AssertionError as e: logger.error("Failed to load audio file: {} with error:\n{}".format( input_file, e)) logger.debug("[{0}] Finished: {1}".format(time.asctime(), output_file)) return os.path.exists(output_file)