Ejemplo n.º 1
0
    def detect_peaks(self, min_peak_ratio=0.15):
        """--------------------------------------------------------------------
        Finds the peak indices of the distribution. These are treated as tonic
        candidates in higher order functions.
        min_peak_ratio: The minimum ratio between the max peak value and the
                        value of a detected peak
        --------------------------------------------------------------------"""
        assert 1 >= min_peak_ratio >= 0, \
            'min_peak_ratio should be between 0 (keep all peaks) and ' \
            '1 (keep only the highest peak)'

        # Peak detection is handled by Essentia
        detector = std.PeakDetection()
        peak_bins, peak_vals = detector(essentia.array(self.vals))

        # Essentia normalizes the positions to 1, they are converted here
        # to actual index values to be used in bins.
        peak_inds = np.array([int(round(bn * (len(self.bins) - 1)))
                              for bn in peak_bins])

        # if the object is pcd and there is a peak at zeroth index,
        # there will be another in the last index. Since a pcd is circular
        # remove the lower value
        if self.is_pcd() and peak_inds[0] == 0:
            if peak_vals[0] >= peak_vals[-1]:
                peak_inds = peak_inds[:-1]
                peak_vals = peak_vals[:-1]
            else:
                peak_inds = peak_inds[1:]
                peak_vals = peak_vals[1:]

        # remove peaks lower than the min_peak_ratio
        peak_bool = peak_vals / max(peak_vals) >= min_peak_ratio

        return peak_inds[peak_bool], peak_vals[peak_bool]
Ejemplo n.º 2
0
def vibFreq(pitchtrack, sp, hopsize):
    '''
    :param pitchtrack:
    :param sp: samplerate of wave audio
    :param hopsize:
    :return: 3 frequencies of potential vibrato
    '''

    if pitchtrack.dtype != np.float32:
        pitchtrack = pitchtrack.astype(np.float32)

    pitchtrackPad = pitchtrack[:]

    sampleRate = sp / hopsize
    ptlen = len(pitchtrack)
    fftSize = int(pow(2, ceil(log(ptlen) /
                              log(2))))  # next pow of pitchtrack length
    if ptlen < fftSize:
        pitchtrackPad = np.append(pitchtrack,
                                  np.zeros(fftSize - ptlen, dtype=np.float32))
    S = ess.Spectrum(size=fftSize)(pitchtrackPad)
    locs, amps = ess.PeakDetection(maxPeaks=3, orderBy='amplitude')(S)
    freqs = locs * (fftSize / 2 + 1) * sampleRate / fftSize

    return freqs[0]
Ejemplo n.º 3
0
	def detect_peaks(self):
		detector = std.PeakDetection()
		peak_bins, peak_vals = detector(essentia.array(self.vals))
		# Essentia normalizes the positions to 1
		peak_idxs = [round(bn * (len(self.bins) - 1)) for bn in peak_bins]
		if(peak_idxs[0] == 0):
			peak_idxs = np.delete(peak_idxs, [len(peak_idxs) - 1])
			peak_vals = np.delete(peak_vals, [len(peak_vals) - 1])
		return peak_idxs, peak_vals
Ejemplo n.º 4
0
    def testSyntheticNoveltyCurve(self):
        # in this test we assume the noveltyCurve is a perfect sine. The algorithm
        # should reproduce a sinusoid with peaks at the same positions as those
        # from the fake noveltycurve. Ploting may help to understand...
        frameSize = 4  # 4 seconds
        overlap = 2
        frameRate = 44100 / 128.
        f = 10  # Hz
        f = f * float(frameRate) / nextPowerTwo(
            int(ceil(frameSize * frameRate)))
        expectedBpm = f * 60.  # 101Bpm

        length = 3000
        noveltyCurve = [
            numpy.sin(2.0 * numpy.pi * f * x / frameRate + 0.135 * numpy.pi)
            for x in range(length)
        ]
        for idx, x in enumerate(noveltyCurve):
            if x < 0: noveltyCurve[idx] = 0
        pool = self.computeBpmHistogram(noveltyCurve, frameSize, overlap,
                                        frameRate)

        #plot(noveltyCurve)
        #plot(pool['sinusoid'],'r')
        #show()

        noveltyPeaks = std.PeakDetection(interpolate=False)(noveltyCurve)[0]
        sinusoidPeaks = std.PeakDetection(interpolate=False)(
            pool['sinusoid'])[0]

        # depending on the framesize, hopsize, etc. the sinusoid is usually
        # larger than the novelty curve, so we need to trim the sinusoid's
        # peaks
        sinusoidPeaks = std.PeakDetection()(
            pool['sinusoid'])[0][:len(noveltyPeaks)]
        for p1, p2 in zip(noveltyPeaks, sinusoidPeaks):
            self.assertAlmostEqual(fabs(p1 - p2), 0, 5e-2)
        self.assertAlmostEqual(pool['bpm'], expectedBpm, 1e-3)
Ejemplo n.º 5
0
def mainFunction(feature, spec, varin):
    '''
    main procedure of algorithm
    :param feature: observation * features
    :param spec: spectrogram
    :param fs:
    :param framesize:
    :param hopsize:
    :return:
    '''

    fs = varin['fs']
    framesize = varin['framesize']
    hopsize = varin['hopsize']

    try:
        max_band = varin['max_band']
    except:
        max_band = 8
    # l = [2,4,6,8,10]
    try:
        l = varin['l']
    except:
        l = 2
    try:
        h0 = varin['h0']
    except:
        h0 = 0.6
    # h1 = [0.6,0.8,1.0]
    try:
        h1 = varin['h1']
    except:
        h1 = 0.08
    # h2 = [0.5,0.6,0.7,0.8,0.9,1.0]
    try:
        h2 = varin['h2']
    except:
        h2 = 0.0725
    th_phone = varin['th_phone']
    q = varin['q']
    k = varin['k']
    delta = varin['delta']
    step2 = varin['step2']
    plot = varin['plot']
    energy = varin['energy']

    M = 3  # legendre order
    PEAK = ess.PeakDetection(interpolate=False, maxPeaks=99999)
    T = feature.shape[0]  # time
    D = feature.shape[1]  # feature dimension
    legCoefs = np.zeros(shape=(T, M + 1))
    G = np.zeros(shape=(max_band, T))
    m_a0 = np.zeros(shape=(max_band, T))
    th_e = np.mean(energy) / 100

    for ii in range(max_band):
        # triangular filtering
        feature[:, ii] = triangularFilter(feature[:, ii])
        # normalizing
        feature[:, ii] = (feature[:, ii] - min(feature[:, ii])) / (
            max(feature[:, ii]) - min(feature[:, ii]))
        # segmentation, fitting legendre polynomial
        for jj in range(l, T - l - 1):
            f = np.zeros(shape=(T, 1))
            seg = feature[jj - l:jj + l + 1, ii]
            seg = (seg - min(seg)) / (max(seg) - min(seg))
            seg = (seg - 0.5) * 2
            f[jj - l:jj + l + 1, 0] = seg
            # legendre coef
            coef = np.polynomial.legendre.legfit(
                np.arange(T).transpose(), f, M)
            legCoefs[jj, :] = coef.transpose()

        legCoefs = np.array(legCoefs)
        a0 = legCoefs[:, 0]
        m_a0[ii, :] = a0

        if max(legCoefs[:, 1]) != min(legCoefs[:, 1]) and max(
                legCoefs[:, 2]) != min(legCoefs[:, 2]):
            a1 = (legCoefs[:, 1] - min(legCoefs[:, 1])) / (
                max(legCoefs[:, 1]) - min(legCoefs[:, 1]))
            a2 = (legCoefs[:, 2] - min(legCoefs[:, 2])) / (
                max(legCoefs[:, 2]) - min(legCoefs[:, 2]))
        else:
            continue

        # detect peaks of a1
        p_a1, a_a1 = PEAK(np.array(a1, dtype=np.float32))
        p_a1 = np.array(np.round(p_a1 * (T - 1)), dtype=np.int)

        # remove peaks which is silence
        for jj, ii_p in reversed(list(enumerate(p_a1))):
            if energy[ii_p] < th_e or a_a1[jj] < h1:
                p_a1 = np.delete(p_a1, jj)
                a_a1 = np.delete(a_a1, jj)

        # detect valleys of a2
        p_a2_v, a_a2_v = PEAK(np.array(1 - a2, dtype=np.float32))
        p_a2_v = np.array(np.round(p_a2_v * (T - 1)), dtype=np.int)

        # detect peaks of a2
        p_a2_p, a_a2_p = PEAK(np.array(a2, dtype=np.float32))
        p_a2_p = np.array(np.round(p_a2_p * (T - 1)), dtype=np.int)

        # BE change
        if len(p_a1) and len(p_a2_p):
            be_change_ii = BE_change(p_a1,
                                     p_a2_p,
                                     p_a2_v,
                                     feature[:, ii],
                                     h2=h2,
                                     frame_interval=5)
            G[ii, be_change_ii] = 1

    # equation 5
    g = np.sum(G, axis=0)

    phoneBoundary = phoneBoundaryStep1(g)

    if step2:
        d = phoneBoundaryStep2(phoneBoundary, m_a0, hopsize, fs, th_phone, q,
                               k, delta)

        p_d, a_d = PEAK(np.array(d[:, 0], dtype=np.float32))

        for ii, ii_a_d in enumerate(a_d):
            if ii_a_d > h0:
                phoneBoundary.append(p_d[ii])

    phoneBoundary_time = np.array(phoneBoundary) * (hopsize / float(fs))

    if plot:
        plt.figure()
        plt.plot(a1)
        plt.stem(p_a1, a_a1)

        plt.show()

        mX = spec
        mX = np.transpose(mX)
        maxplotfreq = 16001.0
        eps = np.finfo(np.float).eps
        mXPlot = mX[:int(N * (maxplotfreq / fs)) + 1, :]
        binFreqs = np.arange(mXPlot.shape[0]) * fs / float(N)
        timestamps = np.arange(mXPlot.shape[1]) * (hopsize / float(fs))

        plt.figure()
        plt.pcolormesh(timestamps, binFreqs, 20 * np.log10(mXPlot + eps))
        plt.title('Hoang')
        plt.xlabel('time(s)')
        plt.ylabel('freq')
        for ii in range(len(phoneBoundary)):
            phoneBoundary_time = phoneBoundary[ii] * (hopsize / float(fs))
            plt.axvline(phoneBoundary_time)
        plt.show()

    return phoneBoundary_time
Ejemplo n.º 6
0
# matplotlib without any blocking GUI
import matplotlib as mpl

mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

from smst.utils import audio

(fs, x) = audio.read_wav('../../../sounds/piano.wav')
start = 13860
M = 800
xp = x[start:start + M] / float(max(x[start:start + M]))
r = ess.AutoCorrelation(normalization='standard')(xp)
r = r / max(r)
peaks = ess.PeakDetection(threshold=.11, interpolate=False, minPosition=.01)(r)

plt.figure(1, figsize=(9, 7))
plt.subplot(211)
plt.plot(np.arange(M) / float(fs), xp, lw=1.5)
plt.axis([0, (M - 1) / float(fs), min(xp), max(xp)])
plt.xlabel('time (sec)')
plt.ylabel('amplitude')
plt.title('x (piano.wav)')

plt.subplot(212)
plt.plot(np.arange(M) / float(fs), r, 'r', lw=1.5)
plt.plot(peaks[0] * (M - 1) / float(fs),
         peaks[1],
         'x',
         color='k',
Ejemplo n.º 7
0
def plot(pool, title, outputfile='out.svg', subplot=111):
    ''' plots bars for each beat'''

    #computeSpectrum(pool['loudness'])

    ticks = pool['ticks']
    #barSize = min([ticks[i+1] - ticks[i] for i in range(len(ticks[:-1]))])/2.
    barSize = 0.8
    offset = barSize / 2.

    loudness = pool['loudness']
    loudnessBand = pool['loudnessBandRatio']  # ticks x bands

    medianRatiosPerTick = []
    meanRatiosPerTick = []
    for tick, energy in enumerate(loudnessBand):
        medianRatiosPerTick.append(median(energy))
        meanRatiosPerTick.append(mean(energy))

    loudnessBand = copy.deepcopy(loudnessBand.transpose())  # bands x ticks

    #xcorr = std.CrossCorrelation(minLag=0, maxLag=16)
    #acorr = std.AutoCorrelation()
    #bandCorr = []
    #for iBand, band in enumerate(loudnessBand):
    #    bandCorr.append(acorr(essentia.array(band)))

    nBands = len(loudnessBand)
    nticks = len(loudness)
    maxRatiosPerBand = []
    medianRatiosPerBand = []
    meanRatiosPerBand = []
    for idxBand, band in enumerate(loudnessBand):
        maxRatiosPerBand.append([0] * nticks)
        medianRatiosPerBand.append([0] * nticks)
        meanRatiosPerBand.append([0] * nticks)
        for idxTick in range(nticks):
            start = idxTick
            end = start + BEATWINDOW
            if (end > nticks):
                howmuch = end - nticks
                end = nticks - 1
                start = end - howmuch
                if start < 0: start = 0
            medianRatiosPerBand[idxBand][idxTick] = median(band[start:end])
            maxRatiosPerBand[idxBand][idxTick] = max(band[start:end])
            meanRatiosPerBand[idxBand][idxTick] = mean(band[start:end])

    for iBand, band in enumerate(loudnessBand):
        for tick, ratio in enumerate(band):
            #if ratio < medianRatiosPerBand[iBand][tick] and\
            #   ratio <= medianRatiosPerTick[tick]: loudnessBand[iBand][tick]=0
            bandThreshold = max(medianRatiosPerBand[iBand][tick],
                                meanRatiosPerBand[iBand][tick])
            tickThreshold = max(medianRatiosPerTick[tick],
                                meanRatiosPerTick[tick])
            if ratio < bandThreshold and ratio <= tickThreshold:
                loudnessBand[iBand][tick] = 0
            else:
                loudnessBand[iBand][tick] *= loudness[tick]
                #if loudnessBand[iBand][tick] > 1 : loudnessBand[iBand][tick] = 1

    acorr = std.AutoCorrelation()
    bandCorr = []
    maxCorr = []
    for iBand, band in enumerate(loudnessBand):
        bandCorr.append(acorr(essentia.array(band)))
        maxCorr.append(argmax(bandCorr[-1][2:]) + 2)

    # use as much window space as possible:
    pyplot.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95)

    pyplot.subplot(511)
    pyplot.imshow(bandCorr,
                  cmap=pyplot.cm.hot,
                  aspect='auto',
                  origin='lower',
                  interpolation='nearest')
    print 'max correlation', maxCorr

    sumCorr = []
    for tick in range(nticks):
        total = 0
        for band in bandCorr:
            total += band[tick]
        sumCorr.append(total)

    sumCorr[0] = 0
    sumCorr[1] = 0
    pyplot.subplot(512)
    maxAlpha = max(sumCorr)
    for i, val in enumerate(sumCorr):
        alpha = max(0, min(val / maxAlpha, 1))
        pyplot.bar(i,
                   1,
                   barSize,
                   align='edge',
                   bottom=0,
                   alpha=alpha,
                   color='r',
                   edgecolor='w',
                   linewidth=.3)

    print 'max sum correlation', argmax(sumCorr[2:]) + 2

    hist = getHarmonics(sumCorr)
    maxHist = argmax(hist)
    print 'max histogram', maxHist
    #for idx,val in enumerate(hist):
    #    if val < maxHist: hist[idx] = 0

    pyplot.subplot(513)
    for i, val in enumerate(hist):
        pyplot.bar(i,
                   val,
                   barSize,
                   align='edge',
                   bottom=0,
                   color='r',
                   edgecolor='w',
                   linewidth=.3)

    peakDetect = std.PeakDetection(maxPeaks=5,
                                   orderBy='amplitude',
                                   minPosition=0,
                                   maxPosition=len(sumCorr) - 1,
                                   range=len(sumCorr) - 1)
    peaks = peakDetect(sumCorr)[0]
    peaks = [round(x + 1e-15) for x in peaks]
    print 'Peaks:', peaks

    pyplot.subplot(514)
    maxAlpha = max(sumCorr)
    for i, val in enumerate(sumCorr):
        alpha = max(0, min(val / maxAlpha, 1))
        pyplot.bar(i,
                   val,
                   barSize,
                   align='edge',
                   bottom=0,
                   alpha=alpha,
                   color='r',
                   edgecolor='w',
                   linewidth=.3)

    # multiply both histogram and sum corr to have a weighted histogram:
    wHist = essentia.array(hist) * sumCorr * acorr(loudness)
    maxHist = argmax(wHist)
    print 'max weighted histogram', maxHist
    pyplot.subplot(515)

    maxAlpha = max(wHist)
    for i, val in enumerate(wHist):
        alpha = max(0, min(val / maxAlpha, 1))
        pyplot.bar(i,
                   val,
                   barSize,
                   align='edge',
                   bottom=0,
                   alpha=alpha,
                   color='r',
                   edgecolor='w',
                   linewidth=.3)

    pyplot.savefig(outputfile, dpi=300)
    #pyplot.show()
    return
Ejemplo n.º 8
0
def mainFunction(feature,spec,varin):

    '''
    main procedure of algorithm
    :param feature: observation * features
    :param fs:
    :param framesize:
    :param hopsize:
    :return:
    '''

    fs              = varin['fs']
    framesize       = varin['framesize']
    hopsize         = varin['hopsize']
    # h2 = [0.0,0.02,0.04,0.06,0.08,0.1]
    h2              = varin['h2']
    # alpha = [0.2,0.4,0.6,0.8,1.0]
    alpha           = varin['alpha']
    # p_lambda = [0.2,0.4,0.6,0.8,1.0] mode_bic = BIC
    p_lambda        = varin['p_lambda']
    mode_bic        = varin['mode_bic']
    try:
        winmax          = varin['winmax']
    except:
        winmax          = 0.35
    plot            = varin['plot']

    if varin['feature_select'] == 'mfcc':
        mfcc        = feature
    else:
        mfcc        = varin['mfcc']

    # # spectrogram init
    # winAnalysis     = 'hann'
    # N               = 2 * framesize                     # padding 1 time framesize
    # SPECTRUM        = ess.Spectrum(size=N)
    # WINDOW          = ess.Windowing(type=winAnalysis, zeroPadding=N-framesize)
    # highFrequencyBound = fs/2 if fs/2<11000 else 11000
    # MFCC            = ess.MFCC(sampleRate=fs,highFrequencyBound=highFrequencyBound,inputSize=framesize+1)
    # GFCC            = ess.GFCC(sampleRate=fs,highFrequencyBound=highFrequencyBound)
    # mfcc            = []
    # gfcc            = []
    # mX              = []
    #
    # print 'calculating MFCC ... ...'
    #
    # for frame in ess.FrameGenerator(audio, frameSize=framesize, hopSize=hopsize):
    #
    #     frame           = WINDOW(frame)
    #     mXFrame         = SPECTRUM(frame)
    #     mX.append(mXFrame)
    #     bands,mfccFrame = MFCC(mXFrame)
    #     mfccFrame       = mfccFrame[1:]
    #     bands,gfccFrame = GFCC(mXFrame)
    #     gfccFrame       = gfccFrame[1:]
    #
    #     mfcc.append(mfccFrame)
    #     gfcc.append(gfccFrame)
    #
    # mX              = np.array(mX)
    # mfcc            = np.array(mfcc)
    # gfcc            = np.array(gfcc)
    T               = mfcc.shape[0]                         # time
    D               = mfcc.shape[1]                         # feature dimension
    winmax_frame    = np.int(np.round(winmax*fs/hopsize))
    PEAK            = ess.PeakDetection(interpolate=False,maxPeaks=99999)

    # plpcc,plp       = PLP(mX,modelorder=12,rasta=False)
    # all_MRCG,d_MRCG,dd_MRCG = MRCG(audio,fs=fs)

    print 'calculating delta mfcc ... ...'

    d_mfcc          = Fdeltas(mfcc.transpose(), w=9)
    d_mfcc          = np.transpose(d_mfcc)

    # Spectral variation function
    SVF             = np.sqrt(np.sum(d_mfcc**2.0,axis=1))
    SVF             = (SVF - np.min(SVF))/(np.max(SVF)-np.min(SVF))

    # peaks and valleys
    p_SVF,a_SVF     = PEAK(np.array(SVF,dtype=np.float32))
    p_SVF           = np.array(np.round(p_SVF*(T-1)),dtype=np.int)

    p_v_SVF,a_v_SVF = PEAK(np.array(1-SVF,dtype=np.float32))
    p_v_SVF         = np.array(np.round(p_v_SVF*(T-1)),dtype=np.int)

    # heuristics
    p_SVF,a_SVF,p_v_SVF,a_v_SVF = heuristics(p_SVF,a_SVF,p_v_SVF,a_v_SVF,SVF,fs,hopsize,h2,alpha)

    index2Delete    = []
    if len(p_SVF) > 3:
        # BIC
        ii              = 1
        jj              = 1
        # dynamic windowing BIC
        while ii < len(p_SVF)-1:
            p_0             = p_SVF[ii-jj]
            p_1             = p_SVF[ii]
            p_2             = p_SVF[ii+1]
            p_0             = p_SVF[ii] - winmax_frame  if p_1-p_0 > winmax_frame   else p_0

            # try to fix the small sample problem
            # p_0             = p_SVF[ii] - D             if p_1-p_0 < D              else p_0
            # p_2             = p_SVF[ii] + D             if p_2-p_1 < D              else p_2
            #
            # if p_0 < 0 or p_2 > p_SVF[-1]:
            #     print p_0, p_1, p_2
            #     index2Delete.append(ii)
            #     jj              = 1
            #     ii              += 1
            #     continue

            delta_BIC   = BIC(feature[p_0:p_1,:],feature[p_1:p_2,:],feature[p_0:p_2,:],p_lambda,mode=mode_bic,shrinkage=2)

            if  delta_BIC > 0:
                jj              = 1

            else:
                jj              += 1
                index2Delete.append(ii)
            ii              += 1

            if ii >= len(p_SVF)-1: break

            # print delta_BIC, p_0, p_1, p_2,

    p_BIC           = np.delete(p_SVF,index2Delete)
    a_BIC           = np.delete(a_SVF,index2Delete)

    timestamps_p_BIC= p_BIC * (hopsize/float(fs))

    # plot
    if plot:
        N               = 2 * framesize
        mX              = spec
        mX              = np.transpose(mX)
        maxplotfreq     = 6001.0
        eps             = np.finfo(np.float).eps
        mXPlot          = mX[:int(N*(maxplotfreq/fs))+1,:]
        binFreqs        = np.arange(mXPlot.shape[0])*fs/float(N)
        timestamps_spec = np.arange(mXPlot.shape[1]) * (hopsize/float(fs))

        timestamps      = np.arange(T) * (hopsize/float(fs))
        timestamps_p_SVF= p_SVF * (hopsize/float(fs))
        timestamps_p_BIC= p_BIC * (hopsize/float(fs))

        f, axarr = plt.subplots(3, sharex=True)

        axarr[0].plot(timestamps, SVF)
        axarr[0].stem(timestamps_p_SVF, a_SVF)
        axarr[0].set_ylabel('SVF')
        axarr[0].set_title('boundary heuristics')

        axarr[1].plot(timestamps, SVF)
        axarr[1].stem(timestamps_p_BIC, a_BIC)
        axarr[1].set_title('boundary DISTBIC')

        axarr[2].pcolormesh(timestamps_spec, binFreqs, 20*np.log10(mXPlot+eps))
        axarr[2].set_title('spectrogram')
        for ii in range(0,len(timestamps_p_BIC)):
            axarr[2].axvline(timestamps_p_BIC[ii])

        # plpcc           = np.transpose(plp[:,1:])
        # binFreqs        = np.arange(plpcc.shape[0])
        # axarr[4].pcolormesh(timestamps_spec, binFreqs, plpcc)
        #
        # plt.show()
        #
        # all_MRCG        = np.transpose(all_MRCG)
        # binFreqs        = np.arange(all_MRCG.shape[0])
        # timestamps_spec = np.arange(all_MRCG.shape[1]) * (hopsize/float(fs))
        # plt.figure()
        # plt.pcolormesh(timestamps_spec, binFreqs, all_MRCG)
        plt.show()

    return timestamps_p_BIC
Ejemplo n.º 9
0
polyphony = True
three_chords = True
profile = 'krumhansl' # diatonic, krumhansl, temperley, weichai, tonictriad, temperley2005, thpcp, faraldo

# INSTANTIATE ALGORITHMS
#=======================
loader = estd.MonoLoader(
    filename=audiofile
    sampleRate=sample_rate)
window = estd.Windowing(
    size=window_size)
rfft = estd.Spectrum(
    size=window_size)
peaks = estd.PeakDetection(
    interpolate=interpolate, 
    threshold=threshold, 
    minPosition=(min_frequency/nyquist),
    maxPosition=(max_frequency/nyquist),
    maxPeaks=maxPeaks)
hpcp = estd.HPCP(
    bandPreset=band_preset, 
    harmonics = harmonics, 
    minFrequency=min_frequency, 
    maxFrequency=max_frequency, 
    nonLinear=non_linear, 
    normalized=normalize, 
    sampleRate=sample_rate, 
    referenceFrequency=reference_frequency,
    weightType=weight_type, 
    windowSize=weight_window_size)
key = estd.Key(
    numHarmonics=harmonics_key,
def mainFunction(filename,fs,framesize,hopsize,h2,alpha,p_lambda):

    '''
    main procedure of algorithm
    :param filename:
    :param fs:
    :param framesize:
    :param hopsize:
    :return:
    '''

    # load audio
    audio           = ess.MonoLoader(filename = filename, sampleRate = fs)()

    # spectrogram init
    winAnalysis     = 'hann'
    N               = 2 * framesize                     # padding 1 time framesize
    SPECTRUM        = ess.Spectrum(size=N)
    WINDOW          = ess.Windowing(type=winAnalysis, zeroPadding=N-framesize)
    highFrequencyBound = fs/2 if fs/2<11000 else 11000
    MFCC            = ess.MFCC(sampleRate=fs,highFrequencyBound=highFrequencyBound)
    PEAK            = ess.PeakDetection(interpolate=False,maxPeaks=99999)
    mfcc            = []
    mX              = []

    print 'calculating MFCC ... ...'

    for frame in ess.FrameGenerator(audio, frameSize=framesize, hopSize=hopsize):

        frame           = WINDOW(frame)
        mXFrame         = SPECTRUM(frame)
        mX.append(mXFrame)
        bands,mfccFrame = MFCC(mXFrame)
        mfccFrame       = mfccFrame[1:]

        mfcc.append(mfccFrame)

    mX              = np.array(mX)
    mX              = np.transpose(mX)
    mfcc            = np.array(mfcc)
    T               = mfcc.shape[0]                         # time
    D               = mfcc.shape[1]                         # feature dimension

    print 'calculating delta mfcc ... ...'

    d_mfcc          = Fdeltas(mfcc.transpose(), w=9)
    d_mfcc          = np.transpose(d_mfcc)

    # Spectral variation function
    SVF             = np.sqrt(np.sum(d_mfcc**2.0,axis=1))
    SVF             = (SVF - np.min(SVF))/(np.max(SVF)-np.min(SVF))

    # peaks and valleys
    p_SVF,a_SVF     = PEAK(np.array(SVF,dtype=np.float32))
    p_SVF           = np.array(np.round(p_SVF*(T-1)),dtype=np.int)

    p_v_SVF,a_v_SVF = PEAK(np.array(1-SVF,dtype=np.float32))
    p_v_SVF         = np.array(np.round(p_v_SVF*(T-1)),dtype=np.int)

    # heuristics
    p_SVF,a_SVF,p_v_SVF,a_v_SVF = heuristics(p_SVF,a_SVF,p_v_SVF,a_v_SVF,SVF,fs,hopsize,h2,alpha)

    index2Delete    = []
    if len(p_SVF) > 3:
        # BIC
        ii              = 1
        jj              = 1
        # dynamic windowing BIC
        while ii < len(p_SVF)-1:
            p_0             = p_SVF[ii-jj]
            p_1             = p_SVF[ii]
            p_2             = p_SVF[ii+1]

            delta_ABF2   = ABF2(d_mfcc[p_0:p_1,:],d_mfcc[p_1:p_2,:],d_mfcc[p_0:p_2,:],p_lambda)
            if  delta_ABF2 > 0:
                jj              = 1

            else:
                jj              += 1
                index2Delete.append(ii)
            ii              += 1

            if ii >= len(p_SVF)-1: break

            # print delta_BIC, p_0, p_1, p_2,

    p_ABF2          = np.delete(p_SVF,index2Delete)
    a_ABF2          = np.delete(a_SVF,index2Delete)
import numpy as np
from sklearn.cluster import KMeans
from sklearn import preprocessing
from sklearn.externals import joblib
from sklearn.cross_validation import train_test_split, StratifiedKFold
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from vuvAlgos import featureVUV, consonantInterval
from scipy.spatial.distance import pdist, squareform

import matplotlib.pyplot as plt
import essentia.standard as ess

PEAK = ess.PeakDetection(maxPeaks=10)


def boundaryFrame(phoSeg, varin):

    # boundary frame from phoneme segmentation, [[pho_start,pho_end],[pho_s,pho_e],...]
    boundary = [0.0]
    start_time_syllable = phoSeg[0][0]
    if len(phoSeg) > 1:
        for ii in range(len(phoSeg) - 1):
            end_frame = int(
                round((phoSeg[ii][1] - start_time_syllable) * varin['fs'] /
                      varin['hopsize']))
            boundary.append(end_frame)
    end_frame_syllable = int(
        round((phoSeg[-1][1] - start_time_syllable) * varin['fs'] /
Ejemplo n.º 12
0
def vibrato(pitch):
    sampleRate = 44100/256
    frameSize = int(round(0.5*sampleRate))
    frameSize = frameSize if len(pitch)>=frameSize else len(pitch)      #  dynamic frameSize
    fftSize = 4*frameSize
    dBLobe = 15
    dBSecondLobe = 20
    minFreq = 2.0
    maxFreq = 8.0
    minExt = 30.0
    maxExt = 250.0
    fRef = 55.0
    winAnalysis = 'hann'

    # INIT
    f0  = []
    for f in pitch:
        if f < 0:
            f0.append(0)
        else:
            f0.append(f)

    # GET CONTOUR SEGMENTS
    startC=[]
    endC=[]
    if f0[0]>0:
        startC.append(0)

    for ii in range(len(f0)-1):
        if (abs(f0[ii+1]))>0 and f0[ii]==0:
            startC.append(ii+1)
        if (f0[ii+1]==0 and abs(f0[ii])>0):
            endC.append(ii)

    if len(endC)<len(startC):
        endC.append(len(f0))

    WINDOW = ess.Windowing(type=winAnalysis, size = frameSize, zeroPadding=fftSize-frameSize)
    
    # vibrato annotations
    vibSec = [0]*len(f0)
    vibFreq = [0]*len(f0)
    vibFreqMin = [0]*len(f0)
    vibFreqMax = [0]*len(f0)
    vibExt = [0]*len(f0)

    vibBRaw = []
    vibFreqMinRaw = []
    vibFreqMaxRaw = []

    # ANALYSE EACH SEGMENT
    for ii in range (len(startC)):
        # get segment in cents
        contour = f0[startC[ii]:endC[ii]]
        # contour = 1200*np.log2(np.array(contour)/fRef)            #  it's already in midi
        # frame-wise FFT
        for jj in range(0,len(contour)-frameSize,int(round(frameSize/2))):
            frame = contour[jj:jj+frameSize]
            extend = max(frame)-min(frame)
            minFrame = min(frame)
            maxFrame = max(frame)
            frame = frame-np.mean(frame)
            frame = ess.MovingAverage(size=5)(frame)
            frame = WINDOW(frame)

            # extent constraint
            #if extend<minExt or extend>maxExt:
            #    continue

            S = ess.Spectrum(size = fftSize)(frame)

            #fig = plt.figure()
            #ax = fig.add_subplot(211)

            #plt.plot(S)

            locs, amp = ess.PeakDetection(maxPeaks = 3, orderBy = 'amplitude')(S)
            freqs=locs*(fftSize/2+1)*sampleRate/fftSize
            #print freqs, amp
            if len(freqs)<=0:
                continue
            if freqs[0]<minFreq or freqs[0]>maxFreq: # strongest peak is not in considered range
                continue
            if len(freqs)>1: # there is a second peak
                if freqs[1]>minFreq and freqs[1]<maxFreq:
                    continue
                if 20*np.log10(amp[0]/amp[1])<dBLobe:
                    continue
            if len(freqs)>2: #there is a third peak
                if freqs[2]>minFreq and freqs[2]<maxFreq: # it is also in the vibrato range
                    continue
                if 20*np.log10(amp[0]/amp[2])<dBSecondLobe:
                    continue
            vibSec[startC[ii]+jj:startC[ii]+jj+frameSize] = f0[startC[ii]+jj:startC[ii]+jj+frameSize]
            vibExt[startC[ii]+jj:startC[ii]+jj+frameSize] = [extend]*frameSize
            vibFreq[startC[ii]+jj:startC[ii]+jj+frameSize] = [freqs[0]]*frameSize
            vibFreqMin[startC[ii]+jj:startC[ii]+jj+frameSize] = [minFrame]*frameSize
            vibFreqMax[startC[ii]+jj:startC[ii]+jj+frameSize] = [maxFrame]*frameSize

            vibBRaw.append((startC[ii]+jj, startC[ii]+jj+frameSize))
            vibFreqMinRaw.append(minFrame)
            vibFreqMaxRaw.append(maxFrame)
    
    # section filter            
    toRemove = vibratoSectionFilter(vibBRaw, vibFreqMinRaw, vibFreqMaxRaw)
    for tr in toRemove:
        start = vibBRaw[tr][0]
        end = vibBRaw[tr][1]-frameSize/2
        vibSec[start:end] = [0]*(frameSize/2)
        vibExt[start:end] = [0]*(frameSize/2)
        vibFreq[start:end] = [0]*(frameSize/2)
        vibFreqMin[start:end] = [0]*(frameSize/2)
        vibFreqMax[start:end] = [0]*(frameSize/2)

    # write vibrato sections boundary
    vibB = []
    vibBExt = []
    vibBFreq = []
    vibBFreqMin = []
    vibBFreqMax = []
    if vibSec[0]>0:
        vibB.append(0)
    for ii in range(len(f0)-1):
        if abs(vibSec[ii+1])>0 and vibSec[ii]==0:
            vibB.append(ii+1)
        if vibSec[ii+1]==0 and vibSec[ii]>0:
            vibB.append(ii)
    if vibSec[-1]>0:
        endC.append(len(f0)-1)

    assert (len(vibB)%2==0), "vib boundary should be even!"

    for ii in range(len(vibB)/2):
        vibBExt.append(vibExt[vibB[2*ii]:vibB[2*ii+1]+1])
        vibBFreq.append(vibFreq[vibB[2*ii]:vibB[2*ii+1]+1])
        vibBFreqMin.append(vibFreqMin[vibB[2*ii]:vibB[2*ii+1]+1])
        vibBFreqMax.append(vibFreqMax[vibB[2*ii]:vibB[2*ii+1]+1])
    return vibB, vibBExt, vibBFreq, vibBFreqMin, vibBFreqMax