def get_lpc(frames, sample_rate=16000, num_coeffs=32, window_type='hann'): ''' Calculates linear prediction coefficients Parameters: frames : overlapping signal frames for short-time analysis sample_rate : audio sampling rate, num_coeffs : number of linear prediction coefficients window_type : type of windowing function to apply Returns two numpy 2D arrays: LPCs and reflection coefficients ''' frame_size = len(frames[0]) lpc_coeffs = [] reflection_coeffs = [] lpc_estimator = es.LPC(sampleRate=sample_rate, order=num_coeffs - 1) windowing = es.Windowing(type='hann', size=frame_size) for frame in frames: lpc, reflection = lpc_estimator(windowing(frame) * 1000) lpc_coeffs.append(lpc) reflection_coeffs.append(reflection) return np.array(lpc_coeffs).T, np.array(reflection_coeffs).T
def compute(self, *args): x = args[1] order = 12 LPC = es.LPC(order=order, type='regular') idx_ = 0 threshold = 10 powerEstimationThreshold = 10 silenceThreshold = db2pow(-50) detectionThreshold = db2pow(30) start_proc = int(frameSize / 2 - hopSize / 2) end_proc = int(frameSize / 2 + hopSize / 2) y = [] for frame in es.FrameGenerator(x, frameSize=frameSize, hopSize=hopSize, startFromZero=True): if instantPower(frame) < silenceThreshold: idx_ += 1 continue lpc, _ = LPC(frame) lpc /= np.max(lpc) e = es.IIR(numerator=lpc)(frame) e_mf = es.IIR(numerator=-lpc)(e[::-1])[::-1] # Thresholding th_p = np.max([self.robustPower(e, powerEstimationThreshold) *\ detectionThreshold, silenceThreshold]) detections = [i + start_proc for i, v in\ enumerate(e_mf[start_proc:end_proc]**2) if v >= th_p] if detections: starts = [detections[0]] ends = [] end = detections[0] for idx, d in enumerate(detections[1:], 1): if d == detections[idx - 1] + 1: end = d else: ends.append(end) starts.append(d) end = d ends.append(end) for start in starts: y.append((start + idx_ * hopSize) / 44100.) # for end in ends: # y.append((end + idx_ * hopSize) / 44100.) idx_ += 1 return esarr(y)
def lpc_envelope(signal_inp, M, fs, freq_size): """ Returns the Spectral Envelope based on the LPC method Finds the spectral envelope by finding the frequency response of an IIR filter with coefficients as the lp coefficients Parameters ---------- signal_inp : np.array numpy array containing the audio signal M : integer LPC coefficients order fs : float Sampling Rate freq_size : integer Size of the output frequency envelope Returns ------- spectral_envelope : np.array Returns the spectral envelope References ---------- .. [1] Cross Synthesis Using Cepstral Smoothing or Linear Prediction for Spectral Envelopes, J.O. Smith https://ccrma.stanf2000ord.edu/~jos/SpecEnv/LPC_Envelope_Example_Speech.html """ # Find the lpc coefficients using the above function # lpc_coeffs = lpc(signal_inp,M) lpc_coeffs = ess.LPC(order=M, sampleRate=fs)(signal_inp) # print(lpc_coeffs[0]) # To obtain the normalization constant for the filter res_e = lfilter(b=lpc_coeffs[0], a=1, x=signal_inp) G = np.linalg.norm(res_e) # print(G) # Frequency response of the IIR filter with the above as it's denominator coefficients w, h = freqz(b=G, a=lpc_coeffs[0], worN=freq_size, whole=True) # log transform the above spectral_envelope = 20 * np.log10(np.abs(h)[0:freq_size // 2 + 1]) #zero mean # spectral_envelope = spectral_envelope - np.mean(spectral_envelope) return spectral_envelope
def lpcEnvelope(audioSamples, npts, order): '''npts is even number''' lpc = ess.LPC(order=order) lpcCoeffs = lpc(audioSamples) frequencyResponse = fft(lpcCoeffs[0], npts) return frequencyResponse[:npts / 2]
def compute(audio, pool, options): # analysis parameters sampleRate = options['sampleRate'] frameSize = options['frameSize'] hopSize = options['hopSize'] windowType = options['windowType'] # temporal descriptors lpc = ess.LPC(order=10, type='warped', sampleRate=sampleRate) zerocrossingrate = ess.ZeroCrossingRate() # frame algorithms frames = ess.FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize) window = ess.Windowing(size=frameSize, zeroPadding=0, type=windowType) spectrum = ess.Spectrum(size=frameSize) # spectral algorithms barkbands = ess.BarkBands(sampleRate=sampleRate) centralmoments = ess.CentralMoments() crest = ess.Crest() centroid = ess.Centroid() decrease = ess.Decrease() spectral_contrast = ess.SpectralContrast(frameSize=frameSize, sampleRate=sampleRate, numberBands=6, lowFrequencyBound=20, highFrequencyBound=11000, neighbourRatio=0.4, staticDistribution=0.15) distributionshape = ess.DistributionShape() energy = ess.Energy() # energyband_bass, energyband_middle and energyband_high parameters come from "standard" hi-fi equalizers energyband_bass = ess.EnergyBand(startCutoffFrequency=20.0, stopCutoffFrequency=150.0, sampleRate=sampleRate) energyband_middle_low = ess.EnergyBand(startCutoffFrequency=150.0, stopCutoffFrequency=800.0, sampleRate=sampleRate) energyband_middle_high = ess.EnergyBand(startCutoffFrequency=800.0, stopCutoffFrequency=4000.0, sampleRate=sampleRate) energyband_high = ess.EnergyBand(startCutoffFrequency=4000.0, stopCutoffFrequency=20000.0, sampleRate=sampleRate) flatnessdb = ess.FlatnessDB() flux = ess.Flux() harmonic_peaks = ess.HarmonicPeaks() hfc = ess.HFC() mfcc = ess.MFCC() rolloff = ess.RollOff() rms = ess.RMS() strongpeak = ess.StrongPeak() # pitch algorithms pitch_detection = ess.PitchYinFFT(frameSize=frameSize, sampleRate=sampleRate) pitch_salience = ess.PitchSalience() # dissonance spectral_peaks = ess.SpectralPeaks(sampleRate=sampleRate, orderBy='frequency') dissonance = ess.Dissonance() # spectral complexity # magnitudeThreshold = 0.005 is hardcoded for a "blackmanharris62" frame spectral_complexity = ess.SpectralComplexity(magnitudeThreshold=0.005) INFO('Computing Low-Level descriptors...') # used for a nice progress display total_frames = frames.num_frames() n_frames = 0 start_of_frame = -frameSize * 0.5 pitches, pitch_confidences = [], [] progress = Progress(total=total_frames) #scPool = es.Pool() # pool for spectral contrast for frame in frames: frameScope = [start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate] # pool.setCurrentScope(frameScope) # silence rate # pool.add(namespace + '.' + 'silence_rate_60dB', es.isSilent(frame)) pool.add(namespace + '.' + 'silence_rate_60dB', is_silent_threshold(frame, -60)) pool.add(namespace + '.' + 'silence_rate_30dB', is_silent_threshold(frame, -30)) pool.add(namespace + '.' + 'silence_rate_20dB', is_silent_threshold(frame, -20)) if options['skipSilence'] and es.isSilent(frame): total_frames -= 1 start_of_frame += hopSize continue # temporal descriptors pool.add(namespace + '.' + 'zerocrossingrate', zerocrossingrate(frame)) (frame_lpc, frame_lpc_reflection) = lpc(frame) pool.add(namespace + '.' + 'temporal_lpc', frame_lpc) frame_windowed = window(frame) frame_spectrum = spectrum(frame_windowed) # spectrum-based descriptors power_spectrum = frame_spectrum ** 2 pool.add(namespace + '.' + 'spectral_centroid', centroid(power_spectrum)) pool.add(namespace + '.' + 'spectral_decrease', decrease(power_spectrum)) pool.add(namespace + '.' + 'spectral_energy', energy(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_low', energyband_bass(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_middle_low', energyband_middle_low(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_middle_high', energyband_middle_high(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_high', energyband_high(frame_spectrum)) pool.add(namespace + '.' + 'hfc', hfc(frame_spectrum)) pool.add(namespace + '.' + 'spectral_rms', rms(frame_spectrum)) pool.add(namespace + '.' + 'spectral_flux', flux(frame_spectrum)) pool.add(namespace + '.' + 'spectral_rolloff', rolloff(frame_spectrum)) pool.add(namespace + '.' + 'spectral_strongpeak', strongpeak(frame_spectrum)) # central moments descriptors frame_centralmoments = centralmoments(power_spectrum) (frame_spread, frame_skewness, frame_kurtosis) = distributionshape(frame_centralmoments) pool.add(namespace + '.' + 'spectral_kurtosis', frame_kurtosis) pool.add(namespace + '.' + 'spectral_spread', frame_spread) pool.add(namespace + '.' + 'spectral_skewness', frame_skewness) # dissonance (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum) frame_dissonance = dissonance(frame_frequencies, frame_magnitudes) pool.add(namespace + '.' + 'dissonance', frame_dissonance) # mfcc (frame_melbands, frame_mfcc) = mfcc(frame_spectrum) pool.add(namespace + '.' + 'mfcc', frame_mfcc) # spectral contrast (sc_coeffs, sc_valleys) = spectral_contrast(frame_spectrum) #scPool.add(namespace + '.' + 'sccoeffs', sc_coeffs) #scPool.add(namespace + '.' + 'scvalleys', sc_valleys) pool.add(namespace + '.' + 'spectral_contrast', sc_coeffs) # barkbands-based descriptors frame_barkbands = barkbands(frame_spectrum) pool.add(namespace + '.' + 'barkbands', frame_barkbands) pool.add(namespace + '.' + 'spectral_crest', crest(frame_barkbands)) pool.add(namespace + '.' + 'spectral_flatness_db', flatnessdb(frame_barkbands)) barkbands_centralmoments = ess.CentralMoments(range=len(frame_barkbands) - 1) (barkbands_spread, barkbands_skewness, barkbands_kurtosis) = distributionshape( barkbands_centralmoments(frame_barkbands)) pool.add(namespace + '.' + 'barkbands_spread', barkbands_spread) pool.add(namespace + '.' + 'barkbands_skewness', barkbands_skewness) pool.add(namespace + '.' + 'barkbands_kurtosis', barkbands_kurtosis) # pitch descriptors frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum) if frame_pitch > 0 and frame_pitch <= 20000.: pool.add(namespace + '.' + 'pitch', frame_pitch) pitches.append(frame_pitch) pitch_confidences.append(frame_pitch_confidence) pool.add(namespace + '.' + 'pitch_instantaneous_confidence', frame_pitch_confidence) frame_pitch_salience = pitch_salience(frame_spectrum[:-1]) pool.add(namespace + '.' + 'pitch_salience', frame_pitch_salience) # spectral complexity pool.add(namespace + '.' + 'spectral_complexity', spectral_complexity(frame_spectrum)) # display of progress report progress.update(n_frames) n_frames += 1 start_of_frame += hopSize # if no 'temporal_zerocrossingrate' it means that this is a silent file if 'zerocrossingrate' not in descriptorNames(pool.descriptorNames(), namespace): raise ess.EssentiaError('This is a silent file!') #spectralContrastPCA(scPool, pool) # build pitch value histogram from math import log from numpy import bincount # convert from Hz to midi notes midipitches = [] unknown = 0 for freq in pitches: if freq > 0. and freq <= 12600: midipitches.append(12 * (log(freq / 6.875) / 0.69314718055995) - 3.) else: unknown += 1 if len(midipitches) > 0: # compute histogram midipitchhist = bincount(midipitches) # set 0 midi pitch to be the number of pruned value midipitchhist[0] = unknown # normalise midipitchhist = [val / float(sum(midipitchhist)) for val in midipitchhist] # zero pad for i in range(128 - len(midipitchhist)): midipitchhist.append(0.0) else: midipitchhist = [0.] * 128 midipitchhist[0] = 1. # pitchhist = ess.array(zip(range(len(midipitchhist)), midipitchhist)) pool.add(namespace + '.' + 'spectral_pitch_histogram', midipitchhist) # , pool.GlobalScope) # the code below is the same as the one above: # for note in midipitchhist: # pool.add(namespace + '.' + 'spectral_pitch_histogram_values', note) # print "midi note:", note pitch_centralmoments = ess.CentralMoments(range=len(midipitchhist) - 1) (pitch_histogram_spread, pitch_histogram_skewness, pitch_histogram_kurtosis) = distributionshape( pitch_centralmoments(midipitchhist)) pool.add(namespace + '.' + 'spectral_pitch_histogram_spread', pitch_histogram_spread) # , pool.GlobalScope) progress.finish()
import numpy as np import matplotlib.pyplot as plt from scipy.signal import hamming, hanning, triang, blackmanharris, resample import math import sys, os, time from scipy.fftpack import fft, ifft import essentia.standard as ess sys.path.append( os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../software/models/')) import utilFunctions as UF lpc = ess.LPC(order=14) N = 512 (fs, x) = UF.wavread('../../../sounds/soprano-E4.wav') first = 20000 last = first + N x1 = x[first:last] X = fft(hamming(N) * x1) mX = 20 * np.log10(abs(X[:N // 2])) coeff = lpc(x1) Y = fft(coeff[0], N) mY = 20 * np.log10(abs(Y[:N // 2])) plt.figure(1, figsize=(9, 5)) plt.subplot(2, 1, 1) plt.plot(np.arange(first, last) / float(fs), x[first:last], 'b', lw=1.5) plt.axis([ first / float(fs), last / float(fs),
def compute(self, *args): x = args[1] LPC = es.LPC(order=order, type='regular') W = es.Windowing(size=frame_size, zeroPhase=False, type='triangular') predicted = np.zeros(hop_size) y = [] self.frames = [] self.errors = [] self.errors_filt = [] self.samples_peaking_frame = [] self.frame_idx = [] self.power = [] frame_counter = 0 for frame in es.FrameGenerator(x, frameSize=frame_size, hopSize=hop_size, startFromZero=True): self.power.append(es.essentia.instantPower(frame)) self.frames.append(frame) frame_un = np.array(frame[hop_size // 2:hop_size * 3 // 2]) frame = W(frame) norm = np.max(np.abs(frame)) if not norm: continue frame /= norm lpc_f, _ = LPC(esarray(frame)) lpc_f1 = lpc_f[1:][::-1] for idx, i in enumerate(range(hop_size // 2, hop_size * 3 // 2)): predicted[idx] = -np.sum( np.multiply(frame[i - order:i], lpc_f1)) error = np.abs(frame[hop_size // 2:hop_size * 3 // 2] - predicted) threshold1 = times_thld * np.std(error) med_filter = medfilt(error, kernel_size=kernel_size) filtered = np.abs(med_filter - error) mask = [] for i in range(0, len(error), sub_frame): r = es.essentia.instantPower( frame_un[i:i + sub_frame]) > energy_thld mask += [r] * sub_frame mask = mask[:len(error)] mask = np.array([mask]).astype(float)[0] if sum(mask) == 0: threshold2 = 1000 # just skip silent frames else: threshold2 = times_thld * (np.std(error[mask.astype(bool)]) + np.median(error[mask.astype(bool)])) threshold = np.max([threshold1, threshold2]) samples_peaking = np.sum(filtered >= threshold) if samples_peaking >= 1: y.append(frame_counter * hop_size / 44100.) self.frame_idx.append(frame_counter) self.frames.append(frame) self.errors.append(error) self.errors_filt.append(filtered) self.samples_peaking_frame.append(samples_peaking) frame_counter += 1 return np.array(y)