def plot_amplitude(audiopath, title="", duration=3, plotpath=None): """ Plots the amplitude of an audio signal over time. """ samplerate, samples = _sf.readfile(audiopath) if samples.size/samplerate < 3: raise Exception("Input too short") samples = samples[0:samplerate*duration] _pl.figure(figsize=(10, 3)) _pl.plot(samples) _pl.title(title) xlocs = _np.float32([samplerate*i/2 for i in range(2*duration + 1)]) _pl.xlabel("Time (s)") _pl.xlim([0, _np.max(xlocs)]) _pl.xticks(xlocs, ["%.2f" % (l/samplerate) for l in xlocs]) _pl.ylabel("Amplitude") if plotpath: _pl.savefig(plotpath, bbox_inches="tight") else: _pl.show() _pl.clf() return
def plot_amplitude(audiopath, title="", duration=3, plotpath=None): """ Plots the amplitude of an audio signal over time. """ samplerate, samples = _sf.readfile(audiopath) if samples.size / samplerate < 3: raise Exception("Input too short") samples = samples[0:samplerate * duration] _pl.figure(figsize=(10, 3)) _pl.plot(samples) _pl.title(title) xlocs = _np.float32([samplerate * i / 2 for i in range(2 * duration + 1)]) _pl.xlabel("Time (s)") _pl.xlim([0, _np.max(xlocs)]) _pl.xticks(xlocs, ["%.2f" % (l / samplerate) for l in xlocs]) _pl.ylabel("Amplitude") if plotpath: _pl.savefig(plotpath, bbox_inches="tight") else: _pl.show() _pl.clf() return
def plotstft(audiopath="wave.npz", binsize=1470, guidelines=False, plotpath=None, colormap="jet"): """ Plots the spectrogram of a given file. """ import soundfiles as sf samplerate, samples = sf.readfile(audiopath) s = stft(samples, binsize) sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate) ims = 20. * _np.log10(_np.abs(sshow) / 10e-6) # amplitude to decibel timebins, freqbins = _np.shape(ims) if guidelines: min_f = _np.min(ims) notebins = _pda.note_bins(_mt.notes, binsize) for t in range(len(ims) // 8): t = t * 8 for n in range(len(notebins)): ims[t][notebins[n]] = min_f _pl.figure(figsize=(15, 7.5)) _pl.imshow(_np.transpose(ims), origin="lower", aspect="auto", cmap=colormap, interpolation="none") _pl.colorbar() _pl.xlabel("Time (s)") _pl.ylabel("Frequency (Hz)") _pl.xlim([0, timebins - 1]) _pl.ylim([0, 0.2 * freqbins]) xlocs = _np.float32(_np.linspace(0, timebins - 1, 20)) _pl.xticks(xlocs, [ "%.02f" % l for l in ((xlocs * samples.size / timebins) + (0.5 * binsize)) / samplerate ]) ylocs = _np.int16(_np.round(_np.linspace(0, 0.2 * freqbins - 1, 20))) _pl.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs]) if plotpath: _pl.savefig(plotpath, bbox_inches="tight") else: _pl.show() _pl.clf()
def plothps(audiopath, title="Harmonic Product Spectrum", horizontal_harmonics=7, plotpath=None): """ Plots a visual representation of the HPS with 3 harmonics. """ samplerate, samples = _sf.readfile(audiopath) X = _np.fft.fft(samples, samplerate) # amplitude to decibel dBX = 20. * _np.log10(_np.abs(X) / 10e-6) - 120 # remove mirror dBX = dBX[0:dBX.size / 2] f, (ax0, ax1, ax2, ax3) = _pl.subplots(4, sharex=True, sharey=True) axs = (ax0, ax1, ax2, ax3) sum = _np.zeros_like(dBX) for i in range(3): dec = _sig.decimate(dBX, i + 1) sum[:dec.size] += dec axs[i].plot(dec, 'b') sum = _np.divide(sum, 3) ax3.plot(sum, 'b') ax0.set_title(title) reference = _np.argmax(sum) xlocs = _np.float32( [n * reference for n in range(1 + horizontal_harmonics)]) ax3.set_xlabel("Frequency (Hz)") ax3.set_xlim([0, _np.max(xlocs)]) ax3.set_xticks(xlocs) ax3.set_xticklabels(["%.0f" % l for l in xlocs]) ax0.set_ylabel("Amplitude (dB)") ax1.set_ylabel("Decimated by 2") ax2.set_ylabel("Decimated by 3") ax3.set_ylabel("Mean") ax3.set_ylim([40, 1.15 * _np.max(sum)]) if plotpath: _pl.savefig(plotpath, bbox_inches="tight") else: _pl.show() _pl.clf()
def plot_noise(audiopath, windowsize=735, title="", plotpath=None): """ Too hard to explain, just call it and see what happens, or read the code. """ samplerate, samples = _sf.readfile(audiopath) if samples.size < 3.5 * samplerate: raise Exception("Input is too short.") samples = samples[0:windowsize + 3.5 * samplerate] windows = samples.size // windowsize rms = _np.zeros(windows) for i in range(windows): w = samples[i * windowsize:(i + 1) * windowsize] rms[i] = _np.sqrt(_np.mean(_np.square(w))) first3seconds = _np.copy(rms[0:(3 * samplerate // windowsize)]) first3seconds.sort() pct98 = first3seconds[int(0.98 * first3seconds.size)] a_pct98 = _np.repeat(pct98, rms.size) a_noise = _np.repeat(1.5 * pct98, rms.size) _pl.figure(figsize=(10, 3)) _pl.title(title) _pl.plot(rms, 'r', label='RMS Power') _pl.plot(a_pct98, 'g', label='98 percentile') _pl.plot(a_noise, 'b', label='noise threshold') _pl.legend(loc=2) _pl.xlabel("Time (seconds)") xlocs = _np.int32([ n * samplerate / (2 * windowsize) for n in range(1 + 2 * samples.size // samplerate) ]) xlabels = ["%.1f" % (0.5 * int(n)) for n in range(xlocs.size)] _pl.xlim(0, 3.5 * samplerate // windowsize) _pl.xticks(xlocs, xlabels) _pl.ylim([0, 2 * _np.max(first3seconds)]) if plotpath: _pl.savefig(plotpath, bbox_inches="tight") else: _pl.show() _pl.clf() return
def plot_noise(audiopath, windowsize=735, title="", plotpath=None): """ Too hard to explain, just call it and see what happens, or read the code. """ samplerate, samples = _sf.readfile(audiopath) if samples.size < 3.5*samplerate: raise Exception("Input is too short.") samples = samples[0:windowsize + 3.5*samplerate] windows = samples.size//windowsize rms = _np.zeros(windows) for i in range(windows): w = samples[i*windowsize:(i+1)*windowsize] rms[i] = _np.sqrt(_np.mean(_np.square(w))) first3seconds = _np.copy(rms[0:(3*samplerate//windowsize)]) first3seconds.sort() pct98 = first3seconds[int(0.98*first3seconds.size)] a_pct98 = _np.repeat(pct98, rms.size) a_noise = _np.repeat(1.5*pct98, rms.size) _pl.figure(figsize=(10, 3)) _pl.title(title) _pl.plot(rms, 'r', label='RMS Power') _pl.plot(a_pct98, 'g', label='98 percentile') _pl.plot(a_noise, 'b', label='noise threshold') _pl.legend(loc=2) _pl.xlabel("Time (seconds)") xlocs = _np.int32([n*samplerate/(2*windowsize) for n in range(1 + 2*samples.size//samplerate)]) xlabels = ["%.1f" % (0.5*int(n)) for n in range(xlocs.size)] _pl.xlim(0, 3.5*samplerate//windowsize) _pl.xticks(xlocs, xlabels) _pl.ylim([0, 2*_np.max(first3seconds)]) if plotpath: _pl.savefig(plotpath, bbox_inches="tight") else: _pl.show() _pl.clf() return
def plotstft(audiopath="wave.npz", binsize=1470, guidelines=False, plotpath=None, colormap="jet"): """ Plots the spectrogram of a given file. """ import soundfiles as sf samplerate, samples = sf.readfile(audiopath) s = stft(samples, binsize) sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate) ims = 20.0 * _np.log10(_np.abs(sshow) / 10e-6) # amplitude to decibel timebins, freqbins = _np.shape(ims) if guidelines: min_f = _np.min(ims) notebins = _pda.note_bins(_mt.notes, binsize) for t in range(len(ims) // 8): t = t * 8 for n in range(len(notebins)): ims[t][notebins[n]] = min_f _pl.figure(figsize=(15, 7.5)) _pl.imshow(_np.transpose(ims), origin="lower", aspect="auto", cmap=colormap, interpolation="none") _pl.colorbar() _pl.xlabel("Time (s)") _pl.ylabel("Frequency (Hz)") _pl.xlim([0, timebins - 1]) _pl.ylim([0, 0.2 * freqbins]) xlocs = _np.float32(_np.linspace(0, timebins - 1, 20)) _pl.xticks(xlocs, ["%.02f" % l for l in ((xlocs * samples.size / timebins) + (0.5 * binsize)) / samplerate]) ylocs = _np.int16(_np.round(_np.linspace(0, 0.2 * freqbins - 1, 20))) _pl.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs]) if plotpath: _pl.savefig(plotpath, bbox_inches="tight") else: _pl.show() _pl.clf()
def plot_tracking(audiopath, title="", binsize=1470, tune=False, plotpath=None, repetitions=10): """ Plots the HPS tracking of an audio file. """ samplerate, samples = _sf.readfile(audiopath) detections = samples.size // binsize p = _np.zeros(repetitions * detections) for i in range(detections): f = _hps.hps(samples[i * binsize:(i + 1) * binsize]) if tune: f = _mh.find_nearest_value(_mt.notes, f) p = _np.repeat(p, repetitions) _pl.plot(p) _pl.title(title) xlocs = _np.linspace(0, 10 * detections, 5) _pl.xlabel("Time (s)") _pl.xlim([0, _np.max(xlocs)]) _pl.xticks(xlocs, [ "%.2f" % l for l in _np.multiply(xlocs, binsize / (repetitions * samplerate)) ]) _pl.ylabel("Fundamental Frequency (Hz)") _pl.ylim((0.9 * _np.min(p), 1.05 * _np.max(p))) if plotpath: _pl.savefig(plotpath, bbox_inches="tight") else: _pl.show() _pl.clf()
def plot_tonguing(audiopath, title="", duration=3, plotpath=None): """ Plots a visual representation of the tonguing detection algorithm. """ samplerate, samples = _sf.readfile(audiopath) if samples.size / samplerate < 3: raise Exception("Input too short") samples = samples[0:samplerate * duration] envelope = _tong._envelope(samples) smooth = _tong._exponential_smoothing(envelope, x_s0=_np.mean(samples[0:50])) f, (ax0, ax1, ax2, ax3) = _pl.subplots(4, sharex=True) ax0.plot(samples) ax1.plot(_np.abs(samples)) ax2.plot(envelope) ax3.plot(smooth) ax0.set_title(title) xlocs = _np.float32([samplerate * i / 2 for i in range(2 * duration + 1)]) ax3.set_xlabel("Time (s)") ax3.set_xlim([0, _np.max(xlocs)]) ax3.set_xticks(xlocs) ax3.set_xticklabels(["%.2f" % (l / samplerate) for l in xlocs]) ax0.set_ylabel("Signal") ax1.set_ylabel("Signal (Absolute)") ax2.set_ylabel("Hilbert Envelope") ax3.set_ylabel("Smoothed Envelope") if plotpath: _pl.savefig(plotpath, bbox_inches="tight") else: _pl.show() _pl.clf() return
def plot_tonguing(audiopath, title="", duration=3, plotpath=None): """ Plots a visual representation of the tonguing detection algorithm. """ samplerate, samples = _sf.readfile(audiopath) if samples.size/samplerate < 3: raise Exception("Input too short") samples = samples[0:samplerate*duration] envelope = _tong._envelope(samples) smooth = _tong._exponential_smoothing(envelope, x_s0=_np.mean(samples[0:50])) f, (ax0, ax1, ax2, ax3) = _pl.subplots(4, sharex=True) ax0.plot(samples) ax1.plot(_np.abs(samples)) ax2.plot(envelope) ax3.plot(smooth) ax0.set_title(title) xlocs = _np.float32([samplerate*i/2 for i in range(2*duration + 1)]) ax3.set_xlabel("Time (s)") ax3.set_xlim([0, _np.max(xlocs)]) ax3.set_xticks(xlocs) ax3.set_xticklabels(["%.2f" % (l/samplerate) for l in xlocs]) ax0.set_ylabel("Signal") ax1.set_ylabel("Signal (Absolute)") ax2.set_ylabel("Hilbert Envelope") ax3.set_ylabel("Smoothed Envelope") if plotpath: _pl.savefig(plotpath, bbox_inches="tight") else: _pl.show() _pl.clf() return
def plotfft(audiopath, audiopath2="", audiopath3="", binsize=44100, plotpath=None): """ Plot the FFT for up to 3 given audio file paths. """ samplerate, samples = _sf.readfile(audiopath) # Merge multiple channels if hasattr(samples[0], "__len__"): samples = _np.mean(samples, 1) samples = samples[0:binsize] X = _np.fft.fft(samples, binsize) # amplitude to decibel dBX = 20.0 * _np.log10(_np.abs(X) / 10e-6) - 120 # remove mirror dBX = dBX[0 : dBX.size / 2] pl.figure(figsize=(15, 7.5)) pl.plot(dBX, "b") if audiopath2 != "": # Yes, I'm lazy and just copy pasted samplerate2, samples2 = _sf.readfile(audiopath2) # Merge multiple channels if hasattr(samples2[0], "__len__"): samples2 = _np.mean(samples2, 1) samples2 = samples2[0:binsize] X2 = _np.fft.fft(samples2, binsize) # amplitude to decibel dBX2 = 20.0 * _np.log10(_np.abs(X2) / 10e-6) - 120 # remove mirror dBX2 = dBX2[0 : dBX2.size / 2] pl.plot(dBX2, "g") if audiopath3 != "": # Yes, I'm lazy and just copy pasted samplerate3, samples3 = _sf.readfile(audiopath3) # Merge multiple channels if hasattr(samples3[0], "__len__"): samples3 = _np.mean(samples3, 1) samples3 = samples3[0:binsize] X3 = _np.fft.fft(samples3, binsize) # amplitude to decibel dBX3 = 20.0 * _np.log10(_np.abs(X3) / 10e-6) - 120 # remove mirror dBX3 = dBX3[0 : dBX3.size / 2] pl.plot(dBX3, "r") pl.xlabel("Frequency (Hz)") pl.ylabel("Amplitude (dB)") pl.xlim([0, binsize]) pl.ylim([0, _np.max(dBX)]) # Use the highest index as the reference. # We assume the highest index corresponds to the fundamental. reference = _np.argmax(dBX if audiopath2 == "" else dBX2) xlocs = _np.float32([n * reference for n in range(0, 50)]) pl.xticks(xlocs, ["%.0f" % l for l in xlocs]) if plotpath: pl.savefig(plotpath, bbox_inches="tight") else: pl.show() pl.clf()
def plotfft(audiopath, audiopath2="", audiopath3="", binsize=44100, plotpath=None): """ Plot the FFT for up to 3 given audio file paths. """ samplerate, samples = _sf.readfile(audiopath) # Merge multiple channels if hasattr(samples[0], "__len__"): samples = _np.mean(samples, 1) samples = samples[0:binsize] X = _np.fft.fft(samples, binsize) # amplitude to decibel dBX = 20. * _np.log10(_np.abs(X) / 10e-6) - 120 # remove mirror dBX = dBX[0:dBX.size / 2] pl.figure(figsize=(15, 7.5)) pl.plot(dBX, 'b') if audiopath2 != "": # Yes, I'm lazy and just copy pasted samplerate2, samples2 = _sf.readfile(audiopath2) # Merge multiple channels if hasattr(samples2[0], "__len__"): samples2 = _np.mean(samples2, 1) samples2 = samples2[0:binsize] X2 = _np.fft.fft(samples2, binsize) # amplitude to decibel dBX2 = 20. * _np.log10(_np.abs(X2) / 10e-6) - 120 # remove mirror dBX2 = dBX2[0:dBX2.size / 2] pl.plot(dBX2, 'g') if audiopath3 != "": # Yes, I'm lazy and just copy pasted samplerate3, samples3 = _sf.readfile(audiopath3) # Merge multiple channels if hasattr(samples3[0], "__len__"): samples3 = _np.mean(samples3, 1) samples3 = samples3[0:binsize] X3 = _np.fft.fft(samples3, binsize) # amplitude to decibel dBX3 = 20. * _np.log10(_np.abs(X3) / 10e-6) - 120 # remove mirror dBX3 = dBX3[0:dBX3.size / 2] pl.plot(dBX3, 'r') pl.xlabel("Frequency (Hz)") pl.ylabel("Amplitude (dB)") pl.xlim([0, binsize]) pl.ylim([0, _np.max(dBX)]) # Use the highest index as the reference. # We assume the highest index corresponds to the fundamental. reference = _np.argmax(dBX if audiopath2 == "" else dBX2) xlocs = _np.float32([n * reference for n in range(0, 50)]) pl.xticks(xlocs, ["%.0f" % l for l in xlocs]) if plotpath: pl.savefig(plotpath, bbox_inches="tight") else: pl.show() pl.clf()