def get_lpc(frames, sample_rate=16000, num_coeffs=32, window_type='hann'):
    '''
    Calculates linear prediction coefficients

    Parameters:
    frames          : overlapping signal frames for short-time analysis
    sample_rate     : audio sampling rate,    
    num_coeffs      : number of linear prediction coefficients
    window_type     : type of windowing function to apply

    Returns two numpy 2D arrays: LPCs and reflection coefficients
    '''
    frame_size = len(frames[0])
    lpc_coeffs = []
    reflection_coeffs = []

    lpc_estimator = es.LPC(sampleRate=sample_rate, order=num_coeffs - 1)
    windowing = es.Windowing(type='hann', size=frame_size)

    for frame in frames:
        lpc, reflection = lpc_estimator(windowing(frame) * 1000)
        lpc_coeffs.append(lpc)
        reflection_coeffs.append(reflection)

    return np.array(lpc_coeffs).T, np.array(reflection_coeffs).T
Exemple #2
0
    def compute(self, *args):
        x = args[1]
        order = 12
        LPC = es.LPC(order=order, type='regular')
        idx_ = 0
        threshold = 10
        powerEstimationThreshold = 10
        silenceThreshold = db2pow(-50)
        detectionThreshold = db2pow(30)

        start_proc = int(frameSize / 2 - hopSize / 2)
        end_proc = int(frameSize / 2 + hopSize / 2)

        y = []
        for frame in es.FrameGenerator(x,
                                       frameSize=frameSize,
                                       hopSize=hopSize,
                                       startFromZero=True):
            if instantPower(frame) < silenceThreshold:
                idx_ += 1
                continue

            lpc, _ = LPC(frame)

            lpc /= np.max(lpc)

            e = es.IIR(numerator=lpc)(frame)

            e_mf = es.IIR(numerator=-lpc)(e[::-1])[::-1]

            # Thresholding
            th_p = np.max([self.robustPower(e, powerEstimationThreshold) *\
                           detectionThreshold, silenceThreshold])

            detections = [i + start_proc for i, v in\
                          enumerate(e_mf[start_proc:end_proc]**2) if v >= th_p]
            if detections:
                starts = [detections[0]]
                ends = []
                end = detections[0]
                for idx, d in enumerate(detections[1:], 1):
                    if d == detections[idx - 1] + 1:
                        end = d
                    else:
                        ends.append(end)
                        starts.append(d)
                        end = d
                ends.append(end)

                for start in starts:
                    y.append((start + idx_ * hopSize) / 44100.)

                # for end in ends:
                #     y.append((end + idx_ * hopSize) / 44100.)

            idx_ += 1

        return esarr(y)
Exemple #3
0
def lpc_envelope(signal_inp, M, fs, freq_size):
    """
	Returns the Spectral Envelope based on the LPC method

	Finds the spectral envelope by finding the frequency response of an IIR filter with coefficients as the lp coefficients

	Parameters
	----------
	signal_inp : np.array
	    numpy array containing the audio signal
	M : integer
	    LPC coefficients order
    fs : float
    	Sampling Rate
	freq_size : integer
		Size of the output frequency envelope

	Returns
	-------
	spectral_envelope : np.array
	    Returns the spectral envelope

	References
	----------
	.. [1] Cross Synthesis Using Cepstral Smoothing or Linear Prediction for Spectral Envelopes, J.O. Smith
	       https://ccrma.stanf2000ord.edu/~jos/SpecEnv/LPC_Envelope_Example_Speech.html

	"""
    # Find the lpc coefficients using the above function
    # lpc_coeffs = lpc(signal_inp,M)
    lpc_coeffs = ess.LPC(order=M, sampleRate=fs)(signal_inp)
    # print(lpc_coeffs[0])

    # To obtain the normalization constant for the filter
    res_e = lfilter(b=lpc_coeffs[0], a=1, x=signal_inp)
    G = np.linalg.norm(res_e)
    # print(G)

    # Frequency response of the IIR filter with the above as it's denominator coefficients
    w, h = freqz(b=G, a=lpc_coeffs[0], worN=freq_size, whole=True)

    # log transform the above
    spectral_envelope = 20 * np.log10(np.abs(h)[0:freq_size // 2 + 1])

    #zero mean
    # spectral_envelope = spectral_envelope - np.mean(spectral_envelope)

    return spectral_envelope
Exemple #4
0
def lpcEnvelope(audioSamples, npts, order):
    '''npts is even number'''
    lpc = ess.LPC(order=order)
    lpcCoeffs = lpc(audioSamples)
    frequencyResponse = fft(lpcCoeffs[0], npts)
    return frequencyResponse[:npts / 2]
Exemple #5
0
def compute(audio, pool, options):
    # analysis parameters
    sampleRate = options['sampleRate']
    frameSize = options['frameSize']
    hopSize = options['hopSize']
    windowType = options['windowType']

    # temporal descriptors
    lpc = ess.LPC(order=10, type='warped', sampleRate=sampleRate)
    zerocrossingrate = ess.ZeroCrossingRate()

    # frame algorithms
    frames = ess.FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize)
    window = ess.Windowing(size=frameSize, zeroPadding=0, type=windowType)
    spectrum = ess.Spectrum(size=frameSize)

    # spectral algorithms
    barkbands = ess.BarkBands(sampleRate=sampleRate)
    centralmoments = ess.CentralMoments()
    crest = ess.Crest()
    centroid = ess.Centroid()
    decrease = ess.Decrease()
    spectral_contrast = ess.SpectralContrast(frameSize=frameSize,
                                             sampleRate=sampleRate,
                                             numberBands=6,
                                             lowFrequencyBound=20,
                                             highFrequencyBound=11000,
                                             neighbourRatio=0.4,
                                             staticDistribution=0.15)
    distributionshape = ess.DistributionShape()
    energy = ess.Energy()
    # energyband_bass, energyband_middle and energyband_high parameters come from "standard" hi-fi equalizers
    energyband_bass = ess.EnergyBand(startCutoffFrequency=20.0, stopCutoffFrequency=150.0, sampleRate=sampleRate)
    energyband_middle_low = ess.EnergyBand(startCutoffFrequency=150.0, stopCutoffFrequency=800.0, sampleRate=sampleRate)
    energyband_middle_high = ess.EnergyBand(startCutoffFrequency=800.0, stopCutoffFrequency=4000.0,
                                            sampleRate=sampleRate)
    energyband_high = ess.EnergyBand(startCutoffFrequency=4000.0, stopCutoffFrequency=20000.0, sampleRate=sampleRate)
    flatnessdb = ess.FlatnessDB()
    flux = ess.Flux()
    harmonic_peaks = ess.HarmonicPeaks()
    hfc = ess.HFC()
    mfcc = ess.MFCC()
    rolloff = ess.RollOff()
    rms = ess.RMS()
    strongpeak = ess.StrongPeak()

    # pitch algorithms
    pitch_detection = ess.PitchYinFFT(frameSize=frameSize, sampleRate=sampleRate)
    pitch_salience = ess.PitchSalience()

    # dissonance
    spectral_peaks = ess.SpectralPeaks(sampleRate=sampleRate, orderBy='frequency')
    dissonance = ess.Dissonance()

    # spectral complexity
    # magnitudeThreshold = 0.005 is hardcoded for a "blackmanharris62" frame
    spectral_complexity = ess.SpectralComplexity(magnitudeThreshold=0.005)

    INFO('Computing Low-Level descriptors...')

    # used for a nice progress display
    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5

    pitches, pitch_confidences = [], []

    progress = Progress(total=total_frames)

    #scPool = es.Pool()  # pool for spectral contrast

    for frame in frames:

        frameScope = [start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate]
        # pool.setCurrentScope(frameScope)

        # silence rate
        # pool.add(namespace + '.' + 'silence_rate_60dB', es.isSilent(frame))
        pool.add(namespace + '.' + 'silence_rate_60dB', is_silent_threshold(frame, -60))
        pool.add(namespace + '.' + 'silence_rate_30dB', is_silent_threshold(frame, -30))
        pool.add(namespace + '.' + 'silence_rate_20dB', is_silent_threshold(frame, -20))

        if options['skipSilence'] and es.isSilent(frame):
            total_frames -= 1
            start_of_frame += hopSize
            continue

        # temporal descriptors
        pool.add(namespace + '.' + 'zerocrossingrate', zerocrossingrate(frame))
        (frame_lpc, frame_lpc_reflection) = lpc(frame)
        pool.add(namespace + '.' + 'temporal_lpc', frame_lpc)

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # spectrum-based descriptors
        power_spectrum = frame_spectrum ** 2
        pool.add(namespace + '.' + 'spectral_centroid', centroid(power_spectrum))
        pool.add(namespace + '.' + 'spectral_decrease', decrease(power_spectrum))
        pool.add(namespace + '.' + 'spectral_energy', energy(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_low', energyband_bass(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_middle_low', energyband_middle_low(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_middle_high', energyband_middle_high(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_high', energyband_high(frame_spectrum))
        pool.add(namespace + '.' + 'hfc', hfc(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_rms', rms(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_flux', flux(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_rolloff', rolloff(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_strongpeak', strongpeak(frame_spectrum))

        # central moments descriptors
        frame_centralmoments = centralmoments(power_spectrum)
        (frame_spread, frame_skewness, frame_kurtosis) = distributionshape(frame_centralmoments)
        pool.add(namespace + '.' + 'spectral_kurtosis', frame_kurtosis)
        pool.add(namespace + '.' + 'spectral_spread', frame_spread)
        pool.add(namespace + '.' + 'spectral_skewness', frame_skewness)

        # dissonance
        (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum)
        frame_dissonance = dissonance(frame_frequencies, frame_magnitudes)
        pool.add(namespace + '.' + 'dissonance', frame_dissonance)

        # mfcc
        (frame_melbands, frame_mfcc) = mfcc(frame_spectrum)
        pool.add(namespace + '.' + 'mfcc', frame_mfcc)

        # spectral contrast
        (sc_coeffs, sc_valleys) = spectral_contrast(frame_spectrum)
        #scPool.add(namespace + '.' + 'sccoeffs', sc_coeffs)
        #scPool.add(namespace + '.' + 'scvalleys', sc_valleys)
        pool.add(namespace + '.' + 'spectral_contrast', sc_coeffs)


        # barkbands-based descriptors
        frame_barkbands = barkbands(frame_spectrum)
        pool.add(namespace + '.' + 'barkbands', frame_barkbands)
        pool.add(namespace + '.' + 'spectral_crest', crest(frame_barkbands))
        pool.add(namespace + '.' + 'spectral_flatness_db', flatnessdb(frame_barkbands))
        barkbands_centralmoments = ess.CentralMoments(range=len(frame_barkbands) - 1)
        (barkbands_spread, barkbands_skewness, barkbands_kurtosis) = distributionshape(
            barkbands_centralmoments(frame_barkbands))
        pool.add(namespace + '.' + 'barkbands_spread', barkbands_spread)
        pool.add(namespace + '.' + 'barkbands_skewness', barkbands_skewness)
        pool.add(namespace + '.' + 'barkbands_kurtosis', barkbands_kurtosis)

        # pitch descriptors
        frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum)
        if frame_pitch > 0 and frame_pitch <= 20000.:
            pool.add(namespace + '.' + 'pitch', frame_pitch)
        pitches.append(frame_pitch)
        pitch_confidences.append(frame_pitch_confidence)
        pool.add(namespace + '.' + 'pitch_instantaneous_confidence', frame_pitch_confidence)

        frame_pitch_salience = pitch_salience(frame_spectrum[:-1])
        pool.add(namespace + '.' + 'pitch_salience', frame_pitch_salience)

        # spectral complexity
        pool.add(namespace + '.' + 'spectral_complexity', spectral_complexity(frame_spectrum))

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    # if no 'temporal_zerocrossingrate' it means that this is a silent file
    if 'zerocrossingrate' not in descriptorNames(pool.descriptorNames(), namespace):
        raise ess.EssentiaError('This is a silent file!')

    #spectralContrastPCA(scPool, pool)

    # build pitch value histogram
    from math import log
    from numpy import bincount
    # convert from Hz to midi notes
    midipitches = []
    unknown = 0
    for freq in pitches:
        if freq > 0. and freq <= 12600:
            midipitches.append(12 * (log(freq / 6.875) / 0.69314718055995) - 3.)
        else:
            unknown += 1

    if len(midipitches) > 0:
        # compute histogram
        midipitchhist = bincount(midipitches)
        # set 0 midi pitch to be the number of pruned value
        midipitchhist[0] = unknown
        # normalise
        midipitchhist = [val / float(sum(midipitchhist)) for val in midipitchhist]
        # zero pad
        for i in range(128 - len(midipitchhist)): midipitchhist.append(0.0)
    else:
        midipitchhist = [0.] * 128
        midipitchhist[0] = 1.

    # pitchhist = ess.array(zip(range(len(midipitchhist)), midipitchhist))
    pool.add(namespace + '.' + 'spectral_pitch_histogram', midipitchhist)  # , pool.GlobalScope)

    # the code below is the same as the one above:
    # for note in midipitchhist:
    #    pool.add(namespace + '.' + 'spectral_pitch_histogram_values', note)
    #    print "midi note:", note

    pitch_centralmoments = ess.CentralMoments(range=len(midipitchhist) - 1)
    (pitch_histogram_spread, pitch_histogram_skewness, pitch_histogram_kurtosis) = distributionshape(
        pitch_centralmoments(midipitchhist))
    pool.add(namespace + '.' + 'spectral_pitch_histogram_spread', pitch_histogram_spread)  # , pool.GlobalScope)

    progress.finish()
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import hamming, hanning, triang, blackmanharris, resample
import math
import sys, os, time
from scipy.fftpack import fft, ifft
import essentia.standard as ess

sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../../../software/models/'))
import utilFunctions as UF

lpc = ess.LPC(order=14)
N = 512
(fs, x) = UF.wavread('../../../sounds/soprano-E4.wav')
first = 20000
last = first + N
x1 = x[first:last]
X = fft(hamming(N) * x1)
mX = 20 * np.log10(abs(X[:N // 2]))

coeff = lpc(x1)
Y = fft(coeff[0], N)
mY = 20 * np.log10(abs(Y[:N // 2]))

plt.figure(1, figsize=(9, 5))
plt.subplot(2, 1, 1)
plt.plot(np.arange(first, last) / float(fs), x[first:last], 'b', lw=1.5)
plt.axis([
    first / float(fs), last / float(fs),
Exemple #7
0
    def compute(self, *args):
        x = args[1]
        LPC = es.LPC(order=order, type='regular')
        W = es.Windowing(size=frame_size, zeroPhase=False, type='triangular')
        predicted = np.zeros(hop_size)
        y = []
        self.frames = []
        self.errors = []
        self.errors_filt = []
        self.samples_peaking_frame = []
        self.frame_idx = []
        self.power = []
        frame_counter = 0

        for frame in es.FrameGenerator(x,
                                       frameSize=frame_size,
                                       hopSize=hop_size,
                                       startFromZero=True):
            self.power.append(es.essentia.instantPower(frame))
            self.frames.append(frame)
            frame_un = np.array(frame[hop_size // 2:hop_size * 3 // 2])
            frame = W(frame)
            norm = np.max(np.abs(frame))
            if not norm:
                continue
            frame /= norm

            lpc_f, _ = LPC(esarray(frame))

            lpc_f1 = lpc_f[1:][::-1]

            for idx, i in enumerate(range(hop_size // 2, hop_size * 3 // 2)):
                predicted[idx] = -np.sum(
                    np.multiply(frame[i - order:i], lpc_f1))

            error = np.abs(frame[hop_size // 2:hop_size * 3 // 2] - predicted)

            threshold1 = times_thld * np.std(error)

            med_filter = medfilt(error, kernel_size=kernel_size)
            filtered = np.abs(med_filter - error)

            mask = []
            for i in range(0, len(error), sub_frame):
                r = es.essentia.instantPower(
                    frame_un[i:i + sub_frame]) > energy_thld
                mask += [r] * sub_frame
            mask = mask[:len(error)]
            mask = np.array([mask]).astype(float)[0]

            if sum(mask) == 0:
                threshold2 = 1000  # just skip silent frames
            else:
                threshold2 = times_thld * (np.std(error[mask.astype(bool)]) +
                                           np.median(error[mask.astype(bool)]))

            threshold = np.max([threshold1, threshold2])

            samples_peaking = np.sum(filtered >= threshold)
            if samples_peaking >= 1:
                y.append(frame_counter * hop_size / 44100.)
                self.frame_idx.append(frame_counter)

            self.frames.append(frame)
            self.errors.append(error)
            self.errors_filt.append(filtered)
            self.samples_peaking_frame.append(samples_peaking)

            frame_counter += 1

        return np.array(y)