Exemple #1
0
def plot_amplitude(audiopath, title="", duration=3, plotpath=None):
    """ Plots the amplitude of an audio signal over time. """
    samplerate, samples = _sf.readfile(audiopath)

    if samples.size/samplerate < 3:
        raise Exception("Input too short")

    samples = samples[0:samplerate*duration]

    _pl.figure(figsize=(10, 3))
    _pl.plot(samples)
    _pl.title(title)

    xlocs = _np.float32([samplerate*i/2 for i in range(2*duration + 1)])
    _pl.xlabel("Time (s)")
    _pl.xlim([0, _np.max(xlocs)])
    _pl.xticks(xlocs, ["%.2f" % (l/samplerate) for l in xlocs])

    _pl.ylabel("Amplitude")

    if plotpath:
        _pl.savefig(plotpath, bbox_inches="tight")
    else:
        _pl.show()

    _pl.clf()
    return
Exemple #2
0
def plot_amplitude(audiopath, title="", duration=3, plotpath=None):
    """ Plots the amplitude of an audio signal over time. """
    samplerate, samples = _sf.readfile(audiopath)

    if samples.size / samplerate < 3:
        raise Exception("Input too short")

    samples = samples[0:samplerate * duration]

    _pl.figure(figsize=(10, 3))
    _pl.plot(samples)
    _pl.title(title)

    xlocs = _np.float32([samplerate * i / 2 for i in range(2 * duration + 1)])
    _pl.xlabel("Time (s)")
    _pl.xlim([0, _np.max(xlocs)])
    _pl.xticks(xlocs, ["%.2f" % (l / samplerate) for l in xlocs])

    _pl.ylabel("Amplitude")

    if plotpath:
        _pl.savefig(plotpath, bbox_inches="tight")
    else:
        _pl.show()

    _pl.clf()
    return
Exemple #3
0
def plotstft(audiopath="wave.npz",
             binsize=1470,
             guidelines=False,
             plotpath=None,
             colormap="jet"):
    """ Plots the spectrogram of a given file. """
    import soundfiles as sf
    samplerate, samples = sf.readfile(audiopath)

    s = stft(samples, binsize)

    sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate)
    ims = 20. * _np.log10(_np.abs(sshow) / 10e-6)  # amplitude to decibel
    timebins, freqbins = _np.shape(ims)

    if guidelines:
        min_f = _np.min(ims)
        notebins = _pda.note_bins(_mt.notes, binsize)
        for t in range(len(ims) // 8):
            t = t * 8
            for n in range(len(notebins)):
                ims[t][notebins[n]] = min_f

    _pl.figure(figsize=(15, 7.5))
    _pl.imshow(_np.transpose(ims),
               origin="lower",
               aspect="auto",
               cmap=colormap,
               interpolation="none")
    _pl.colorbar()

    _pl.xlabel("Time (s)")
    _pl.ylabel("Frequency (Hz)")
    _pl.xlim([0, timebins - 1])
    _pl.ylim([0, 0.2 * freqbins])

    xlocs = _np.float32(_np.linspace(0, timebins - 1, 20))
    _pl.xticks(xlocs, [
        "%.02f" % l for l in ((xlocs * samples.size / timebins) +
                              (0.5 * binsize)) / samplerate
    ])
    ylocs = _np.int16(_np.round(_np.linspace(0, 0.2 * freqbins - 1, 20)))
    _pl.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs])

    if plotpath:
        _pl.savefig(plotpath, bbox_inches="tight")
    else:
        _pl.show()

    _pl.clf()
Exemple #4
0
def plothps(audiopath,
            title="Harmonic Product Spectrum",
            horizontal_harmonics=7,
            plotpath=None):
    """ Plots a visual representation of the HPS with 3 harmonics. """
    samplerate, samples = _sf.readfile(audiopath)

    X = _np.fft.fft(samples, samplerate)

    # amplitude to decibel
    dBX = 20. * _np.log10(_np.abs(X) / 10e-6) - 120

    # remove mirror
    dBX = dBX[0:dBX.size / 2]

    f, (ax0, ax1, ax2, ax3) = _pl.subplots(4, sharex=True, sharey=True)
    axs = (ax0, ax1, ax2, ax3)

    sum = _np.zeros_like(dBX)
    for i in range(3):
        dec = _sig.decimate(dBX, i + 1)
        sum[:dec.size] += dec
        axs[i].plot(dec, 'b')

    sum = _np.divide(sum, 3)
    ax3.plot(sum, 'b')

    ax0.set_title(title)

    reference = _np.argmax(sum)
    xlocs = _np.float32(
        [n * reference for n in range(1 + horizontal_harmonics)])
    ax3.set_xlabel("Frequency (Hz)")
    ax3.set_xlim([0, _np.max(xlocs)])
    ax3.set_xticks(xlocs)
    ax3.set_xticklabels(["%.0f" % l for l in xlocs])

    ax0.set_ylabel("Amplitude (dB)")
    ax1.set_ylabel("Decimated by 2")
    ax2.set_ylabel("Decimated by 3")
    ax3.set_ylabel("Mean")
    ax3.set_ylim([40, 1.15 * _np.max(sum)])

    if plotpath:
        _pl.savefig(plotpath, bbox_inches="tight")
    else:
        _pl.show()

    _pl.clf()
Exemple #5
0
def plot_noise(audiopath, windowsize=735, title="", plotpath=None):
    """ Too hard to explain, just call it and see what happens, or read the code. """
    samplerate, samples = _sf.readfile(audiopath)

    if samples.size < 3.5 * samplerate:
        raise Exception("Input is too short.")

    samples = samples[0:windowsize + 3.5 * samplerate]

    windows = samples.size // windowsize

    rms = _np.zeros(windows)
    for i in range(windows):
        w = samples[i * windowsize:(i + 1) * windowsize]
        rms[i] = _np.sqrt(_np.mean(_np.square(w)))

    first3seconds = _np.copy(rms[0:(3 * samplerate // windowsize)])
    first3seconds.sort()
    pct98 = first3seconds[int(0.98 * first3seconds.size)]

    a_pct98 = _np.repeat(pct98, rms.size)
    a_noise = _np.repeat(1.5 * pct98, rms.size)

    _pl.figure(figsize=(10, 3))
    _pl.title(title)
    _pl.plot(rms, 'r', label='RMS Power')
    _pl.plot(a_pct98, 'g', label='98 percentile')
    _pl.plot(a_noise, 'b', label='noise threshold')
    _pl.legend(loc=2)

    _pl.xlabel("Time (seconds)")
    xlocs = _np.int32([
        n * samplerate / (2 * windowsize)
        for n in range(1 + 2 * samples.size // samplerate)
    ])
    xlabels = ["%.1f" % (0.5 * int(n)) for n in range(xlocs.size)]
    _pl.xlim(0, 3.5 * samplerate // windowsize)
    _pl.xticks(xlocs, xlabels)

    _pl.ylim([0, 2 * _np.max(first3seconds)])

    if plotpath:
        _pl.savefig(plotpath, bbox_inches="tight")
    else:
        _pl.show()

    _pl.clf()
    return
Exemple #6
0
def plot_noise(audiopath, windowsize=735, title="", plotpath=None):
    """ Too hard to explain, just call it and see what happens, or read the code. """
    samplerate, samples = _sf.readfile(audiopath)

    if samples.size < 3.5*samplerate:
        raise Exception("Input is too short.")

    samples = samples[0:windowsize + 3.5*samplerate]

    windows = samples.size//windowsize

    rms = _np.zeros(windows)
    for i in range(windows):
        w = samples[i*windowsize:(i+1)*windowsize]
        rms[i] = _np.sqrt(_np.mean(_np.square(w)))

    first3seconds = _np.copy(rms[0:(3*samplerate//windowsize)])
    first3seconds.sort()
    pct98 = first3seconds[int(0.98*first3seconds.size)]

    a_pct98 = _np.repeat(pct98, rms.size)
    a_noise = _np.repeat(1.5*pct98, rms.size)

    _pl.figure(figsize=(10, 3))
    _pl.title(title)
    _pl.plot(rms, 'r', label='RMS Power')
    _pl.plot(a_pct98, 'g', label='98 percentile')
    _pl.plot(a_noise, 'b', label='noise threshold')
    _pl.legend(loc=2)

    _pl.xlabel("Time (seconds)")
    xlocs = _np.int32([n*samplerate/(2*windowsize) for n in range(1 + 2*samples.size//samplerate)])
    xlabels = ["%.1f" % (0.5*int(n)) for n in range(xlocs.size)]
    _pl.xlim(0, 3.5*samplerate//windowsize)
    _pl.xticks(xlocs, xlabels)

    _pl.ylim([0, 2*_np.max(first3seconds)])

    if plotpath:
        _pl.savefig(plotpath, bbox_inches="tight")
    else:
        _pl.show()

    _pl.clf()
    return
Exemple #7
0
def plotstft(audiopath="wave.npz", binsize=1470, guidelines=False, plotpath=None, colormap="jet"):
    """ Plots the spectrogram of a given file. """
    import soundfiles as sf

    samplerate, samples = sf.readfile(audiopath)

    s = stft(samples, binsize)

    sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate)
    ims = 20.0 * _np.log10(_np.abs(sshow) / 10e-6)  # amplitude to decibel
    timebins, freqbins = _np.shape(ims)

    if guidelines:
        min_f = _np.min(ims)
        notebins = _pda.note_bins(_mt.notes, binsize)
        for t in range(len(ims) // 8):
            t = t * 8
            for n in range(len(notebins)):
                ims[t][notebins[n]] = min_f

    _pl.figure(figsize=(15, 7.5))
    _pl.imshow(_np.transpose(ims), origin="lower", aspect="auto", cmap=colormap, interpolation="none")
    _pl.colorbar()

    _pl.xlabel("Time (s)")
    _pl.ylabel("Frequency (Hz)")
    _pl.xlim([0, timebins - 1])
    _pl.ylim([0, 0.2 * freqbins])

    xlocs = _np.float32(_np.linspace(0, timebins - 1, 20))
    _pl.xticks(xlocs, ["%.02f" % l for l in ((xlocs * samples.size / timebins) + (0.5 * binsize)) / samplerate])
    ylocs = _np.int16(_np.round(_np.linspace(0, 0.2 * freqbins - 1, 20)))
    _pl.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs])

    if plotpath:
        _pl.savefig(plotpath, bbox_inches="tight")
    else:
        _pl.show()

    _pl.clf()
Exemple #8
0
def plot_tracking(audiopath,
                  title="",
                  binsize=1470,
                  tune=False,
                  plotpath=None,
                  repetitions=10):
    """ Plots the HPS tracking of an audio file. """
    samplerate, samples = _sf.readfile(audiopath)

    detections = samples.size // binsize

    p = _np.zeros(repetitions * detections)
    for i in range(detections):
        f = _hps.hps(samples[i * binsize:(i + 1) * binsize])

        if tune:
            f = _mh.find_nearest_value(_mt.notes, f)

    p = _np.repeat(p, repetitions)

    _pl.plot(p)
    _pl.title(title)

    xlocs = _np.linspace(0, 10 * detections, 5)
    _pl.xlabel("Time (s)")
    _pl.xlim([0, _np.max(xlocs)])
    _pl.xticks(xlocs, [
        "%.2f" % l
        for l in _np.multiply(xlocs, binsize / (repetitions * samplerate))
    ])

    _pl.ylabel("Fundamental Frequency (Hz)")
    _pl.ylim((0.9 * _np.min(p), 1.05 * _np.max(p)))

    if plotpath:
        _pl.savefig(plotpath, bbox_inches="tight")
    else:
        _pl.show()

    _pl.clf()
Exemple #9
0
def plot_tonguing(audiopath, title="", duration=3, plotpath=None):
    """ Plots a visual representation of the tonguing detection algorithm. """
    samplerate, samples = _sf.readfile(audiopath)

    if samples.size / samplerate < 3:
        raise Exception("Input too short")

    samples = samples[0:samplerate * duration]
    envelope = _tong._envelope(samples)
    smooth = _tong._exponential_smoothing(envelope,
                                          x_s0=_np.mean(samples[0:50]))

    f, (ax0, ax1, ax2, ax3) = _pl.subplots(4, sharex=True)

    ax0.plot(samples)
    ax1.plot(_np.abs(samples))
    ax2.plot(envelope)
    ax3.plot(smooth)

    ax0.set_title(title)

    xlocs = _np.float32([samplerate * i / 2 for i in range(2 * duration + 1)])
    ax3.set_xlabel("Time (s)")
    ax3.set_xlim([0, _np.max(xlocs)])
    ax3.set_xticks(xlocs)
    ax3.set_xticklabels(["%.2f" % (l / samplerate) for l in xlocs])

    ax0.set_ylabel("Signal")
    ax1.set_ylabel("Signal (Absolute)")
    ax2.set_ylabel("Hilbert Envelope")
    ax3.set_ylabel("Smoothed Envelope")

    if plotpath:
        _pl.savefig(plotpath, bbox_inches="tight")
    else:
        _pl.show()

    _pl.clf()
    return
Exemple #10
0
def plot_tonguing(audiopath, title="", duration=3, plotpath=None):
    """ Plots a visual representation of the tonguing detection algorithm. """
    samplerate, samples = _sf.readfile(audiopath)

    if samples.size/samplerate < 3:
        raise Exception("Input too short")

    samples = samples[0:samplerate*duration]
    envelope = _tong._envelope(samples)
    smooth = _tong._exponential_smoothing(envelope, x_s0=_np.mean(samples[0:50]))

    f, (ax0, ax1, ax2, ax3) = _pl.subplots(4, sharex=True)

    ax0.plot(samples)
    ax1.plot(_np.abs(samples))
    ax2.plot(envelope)
    ax3.plot(smooth)

    ax0.set_title(title)

    xlocs = _np.float32([samplerate*i/2 for i in range(2*duration + 1)])
    ax3.set_xlabel("Time (s)")
    ax3.set_xlim([0, _np.max(xlocs)])
    ax3.set_xticks(xlocs)
    ax3.set_xticklabels(["%.2f" % (l/samplerate) for l in xlocs])

    ax0.set_ylabel("Signal")
    ax1.set_ylabel("Signal (Absolute)")
    ax2.set_ylabel("Hilbert Envelope")
    ax3.set_ylabel("Smoothed Envelope")

    if plotpath:
        _pl.savefig(plotpath, bbox_inches="tight")
    else:
        _pl.show()

    _pl.clf()
    return
Exemple #11
0
def plotfft(audiopath, audiopath2="", audiopath3="", binsize=44100, plotpath=None):
    """ Plot the FFT for up to 3 given audio file paths. """
    samplerate, samples = _sf.readfile(audiopath)

    # Merge multiple channels
    if hasattr(samples[0], "__len__"):
        samples = _np.mean(samples, 1)

    samples = samples[0:binsize]

    X = _np.fft.fft(samples, binsize)

    # amplitude to decibel
    dBX = 20.0 * _np.log10(_np.abs(X) / 10e-6) - 120

    # remove mirror
    dBX = dBX[0 : dBX.size / 2]

    pl.figure(figsize=(15, 7.5))
    pl.plot(dBX, "b")

    if audiopath2 != "":
        # Yes, I'm lazy and just copy pasted
        samplerate2, samples2 = _sf.readfile(audiopath2)

        # Merge multiple channels
        if hasattr(samples2[0], "__len__"):
            samples2 = _np.mean(samples2, 1)

        samples2 = samples2[0:binsize]

        X2 = _np.fft.fft(samples2, binsize)

        # amplitude to decibel
        dBX2 = 20.0 * _np.log10(_np.abs(X2) / 10e-6) - 120

        # remove mirror
        dBX2 = dBX2[0 : dBX2.size / 2]

        pl.plot(dBX2, "g")

    if audiopath3 != "":
        # Yes, I'm lazy and just copy pasted
        samplerate3, samples3 = _sf.readfile(audiopath3)

        # Merge multiple channels
        if hasattr(samples3[0], "__len__"):
            samples3 = _np.mean(samples3, 1)

        samples3 = samples3[0:binsize]

        X3 = _np.fft.fft(samples3, binsize)

        # amplitude to decibel
        dBX3 = 20.0 * _np.log10(_np.abs(X3) / 10e-6) - 120

        # remove mirror
        dBX3 = dBX3[0 : dBX3.size / 2]

        pl.plot(dBX3, "r")

    pl.xlabel("Frequency (Hz)")
    pl.ylabel("Amplitude (dB)")
    pl.xlim([0, binsize])
    pl.ylim([0, _np.max(dBX)])

    # Use the highest index as the reference.
    # We assume the highest index corresponds to the fundamental.
    reference = _np.argmax(dBX if audiopath2 == "" else dBX2)
    xlocs = _np.float32([n * reference for n in range(0, 50)])
    pl.xticks(xlocs, ["%.0f" % l for l in xlocs])

    if plotpath:
        pl.savefig(plotpath, bbox_inches="tight")
    else:
        pl.show()

    pl.clf()
Exemple #12
0
def plotfft(audiopath,
            audiopath2="",
            audiopath3="",
            binsize=44100,
            plotpath=None):
    """ Plot the FFT for up to 3 given audio file paths. """
    samplerate, samples = _sf.readfile(audiopath)

    # Merge multiple channels
    if hasattr(samples[0], "__len__"):
        samples = _np.mean(samples, 1)

    samples = samples[0:binsize]

    X = _np.fft.fft(samples, binsize)

    # amplitude to decibel
    dBX = 20. * _np.log10(_np.abs(X) / 10e-6) - 120

    # remove mirror
    dBX = dBX[0:dBX.size / 2]

    pl.figure(figsize=(15, 7.5))
    pl.plot(dBX, 'b')

    if audiopath2 != "":
        # Yes, I'm lazy and just copy pasted
        samplerate2, samples2 = _sf.readfile(audiopath2)

        # Merge multiple channels
        if hasattr(samples2[0], "__len__"):
            samples2 = _np.mean(samples2, 1)

        samples2 = samples2[0:binsize]

        X2 = _np.fft.fft(samples2, binsize)

        # amplitude to decibel
        dBX2 = 20. * _np.log10(_np.abs(X2) / 10e-6) - 120

        # remove mirror
        dBX2 = dBX2[0:dBX2.size / 2]

        pl.plot(dBX2, 'g')

    if audiopath3 != "":
        # Yes, I'm lazy and just copy pasted
        samplerate3, samples3 = _sf.readfile(audiopath3)

        # Merge multiple channels
        if hasattr(samples3[0], "__len__"):
            samples3 = _np.mean(samples3, 1)

        samples3 = samples3[0:binsize]

        X3 = _np.fft.fft(samples3, binsize)

        # amplitude to decibel
        dBX3 = 20. * _np.log10(_np.abs(X3) / 10e-6) - 120

        # remove mirror
        dBX3 = dBX3[0:dBX3.size / 2]

        pl.plot(dBX3, 'r')

    pl.xlabel("Frequency (Hz)")
    pl.ylabel("Amplitude (dB)")
    pl.xlim([0, binsize])
    pl.ylim([0, _np.max(dBX)])

    # Use the highest index as the reference.
    # We assume the highest index corresponds to the fundamental.
    reference = _np.argmax(dBX if audiopath2 == "" else dBX2)
    xlocs = _np.float32([n * reference for n in range(0, 50)])
    pl.xticks(xlocs, ["%.0f" % l for l in xlocs])

    if plotpath:
        pl.savefig(plotpath, bbox_inches="tight")
    else:
        pl.show()

    pl.clf()