def get_formants(data, rate): from audiolazy import lpc import math # Get Hamming window. N = len(data) w = numpy.hamming(N) # Apply window and high pass filter. x1 = data * w x1 = scipy.signal.lfilter([1], [1., 0.63], x1) # Get LPC. ncoeff = int(2 + rate / 1000) filt = lpc(x1, ncoeff) # Get roots. rts = numpy.roots(filt.numerator) rts = [r for r in rts if numpy.imag(r) >= 0] # Get angles. angz = numpy.arctan2(numpy.imag(rts), numpy.real(rts)) # Get frequencies. frqs = angz * (rate / (2 * math.pi)) frqs = [f for f in frqs if f != 0] frqs_sorted = sorted(frqs) return frqs_sorted
def LPC(file_path='audio_files/1.wav',order=13): rate, signal = wav.read(file_path) p = order ##preemphasize the signal emp_signal = np.append(signal[0],signal[1:]-0.97*signal[:-1]) #y[n] = x[n] - a*x[n-1] filt = lpc(emp_signal,order=p) lpc_coeff = filt.numerator[1:] return lpc_coeff
def LPC(): folder = input('Give the name of the folder that you want to read data: ') amount = input('Give the number of samples in the specific folder: ') for x in range(1, int(amount) + 1): wav = '/' + folder + '/' + str(x) + '.wav' print(wav) emphasizedSignal, signal, rate = preEmphasis(wav) filt = lpc(emphasizedSignal, order=13) lpc_features = filt.numerator[1:] print(len(lpc_features)) print(lpc_features)
def get_formants(x,Fs): formants = numpy.zeros(3) # Get Hamming window. N = len(x) w = numpy.hamming(N) # Apply window and high pass filter. x1 = x * w x1 = lfilter([1], [1., 0.63], x1) # Get LPC. # Fs = spf.getframerate() # Fs = 11025 # ncoeff = int(2 + Fs / 1000) # A, e, k = lpc(x1, ncoeff) A, k = lpc(x1, order=8) list1 = [float(v) for k, v in A.items()] # Get roots. rts = numpy.roots(list1) rts = [r for r in rts if numpy.imag(r) >= 0] # Get angles. angz = numpy.arctan2(numpy.imag(rts), numpy.real(rts)) # Get frequencies. frqs = sorted(angz * (Fs / (2 * math.pi))) frqs = [f for f in frqs if f > 100] # frqs.extend([0, 0, 0]) # formant range range_min = [100,500,1000] range_max = [1500,3500,4500] return frqs
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # # Created on Mon Mar 04 2013 # danilo [dot] bellini [at] gmail [dot] com """ LPC plot with DFT, showing two formants (magnitude peaks) """ from audiolazy import sHz, sin_table, str2freq, lpc import pylab rate = 22050 s, Hz = sHz(rate) size = 512 table = sin_table.harmonize({1: 1, 2: 5, 3: 3, 4: 2, 6: 9, 8: 1}).normalize() data = table(str2freq("Bb3") * Hz).take(size) filt = lpc(data, order=14) # Analysis filter gain = 1e-2 # Gain just for alignment with DFT # Plots the synthesis filter # - If blk is given, plots the block DFT together with the filter # - If rate is given, shows the frequency range in Hz (gain / filt).plot(blk=data, rate=rate, samples=1024, unwrap=False) pylab.ioff() pylab.show()
return cooef if __name__ == '__main__': i = 0 filename = "learn_set//wlacz//" + str(i + 1) + ".wav" sampleRate, signal = PlotModule.readWav(filename, Fs) rate = 22050 s, Hz = sHz(rate) size = 512 table = signal #sin_table.harmonize({1: 1, 2: 5, 3: 3, 4: 2, 6: 9, 8: 1}).normalize() data = table(str2freq("Bb3") * Hz).take(size) filt = lpc(signal, order=14) # Analysis filter gain = 1e-2 # Gain just for alignment with DFT # Plots the synthesis filter # - If blk is given, plots the block DFT together with the filter # - If rate is given, shows the frequency range in Hz (gain / filt).plot(blk=data, rate=rate, samples=1024, unwrap=False) pylab.show() # for i in range(10): # filename = "learn_set//wlacz//"+str(i+1)+".wav" # # sampleRate, signal = PlotModule.readWav(filename, Fs) # # # cooef = getCooefVect(signal)
def _compute_formants(self, audio_buffer): """ Computes the frequencies of formants of the window of audio data, along with their bandwidths. A formant is a frequency band over which there is a concentration of energy. They correspond to tones produced by the vocal tract and are therefore often used to characterize vowels, which have distinct frequencies. In the task of speaker identification, it can be used to characterize a person's speech patterns. This implementation is based on the Matlab tutorial on Estimating Formants using LPC (Linear Predictive Coding) Coefficients: http://www.mathworks.com/help/signal/ug/formant-estimation-with-lpc-coefficients.html. """ # Get Hamming window. More on window functions can be found at https://en.wikipedia.org/wiki/Window_function # The idea of the Hamming window is to smooth out discontinuities at the edges of the window. # Simply multiply to apply the window. N = len(audio_buffer) Fs = 8000 # sampling frequency hamming_window = np.hamming(N) window = audio_buffer * hamming_window # Apply a pre-emphasis filter; this amplifies high-frequency components and attenuates low-frequency components. # The purpose in voice processing is to remove noise. filtered_buffer = lfilter([1], [1., 0.63], window) # Speech can be broken down into (1) The raw sound emitted by the larynx and (2) Filtering that occurs when transmitted from the larynx, defined by, for instance, mouth shape and tongue position. # The larynx emits a periodic function defined by its amplitude and frequency. # The transmission is more complex to model but is in the form 1/(1-sum(a_k * z^-k)), where the coefficients # a_k sufficiently encode the function (because we know it's of that form). # Linear Predictive Coding is a method for estimating these coefficients given a pre-filtered audio signal. # These value are called the roots, because the are the points at which the difference # from the actual signal and the reconstructed signal (using that transmission function) is closest to 0. # See http://dsp.stackexchange.com/questions/2482/speech-compression-in-lpc-how-does-the-linear-predictive-filter-work-on-a-gene. # Get the roots using linear predictive coding. # As a rule of thumb, the order of the LPC should be 2 more than the sampling frequency (in kHz). ncoeff = 2 + Fs / 1000 A = lpc(filtered_buffer, int(ncoeff)) A = np.array([list(A)[0][i] for i in range(0, 10)]) roots = np.roots(A) roots = [r for r in roots if np.imag(r) >= 0] # Get angles from the roots. Each root represents a complex number. The angle in the # complex coordinate system (where x is the real part and y is the imaginary part) # corresponds to the "frequency" of the formant (in rad/s, however, so we need to convert them). # Note it really is a frequency band, not a single frequency, but this is a simplification that is acceptable. angz = np.arctan2(np.imag(roots), np.real(roots)) # Convert the angular frequencies from rad/sample to Hz; then calculate the # bandwidths of the formants. The distance of the roots from the unit circle # gives the bandwidths of the formants (*Extra credit* if you can explain this!). unsorted_freqs = angz * (Fs / (2 * math.pi)) # Let's sort the frequencies so that when we later compare them, we don't overestimate # the difference due to ordering choices. freqs = sorted(unsorted_freqs) # also get the indices so that we can get the bandwidths in the same order indices = np.argsort(unsorted_freqs) sorted_roots = np.asarray(roots)[indices] #compute the bandwidths of each formant bandwidths = -1 / 2. * (Fs / (2 * math.pi)) * np.log(np.abs(sorted_roots)) if self.debug: print("Identified {} formants.".format(len(freqs))) return freqs, bandwidths
def LPC(self, signal): filt = lpc(signal, self.order) lpc_features = filt.numerator[1:] return np.array(lpc_features)
if __name__ == '__main__': i = 0 filename = "learn_set//wlacz//"+str(i+1)+".wav" sampleRate, signal = PlotModule.readWav(filename, Fs) rate = 22050 s, Hz = sHz(rate) size = 512 table = signal #sin_table.harmonize({1: 1, 2: 5, 3: 3, 4: 2, 6: 9, 8: 1}).normalize() data = table(str2freq("Bb3") * Hz).take(size) filt = lpc(signal, order=14) # Analysis filter gain = 1e-2 # Gain just for alignment with DFT # Plots the synthesis filter # - If blk is given, plots the block DFT together with the filter # - If rate is given, shows the frequency range in Hz (gain / filt).plot(blk=data, rate=rate, samples=1024, unwrap=False) pylab.show() # for i in range(10): # filename = "learn_set//wlacz//"+str(i+1)+".wav" # # sampleRate, signal = PlotModule.readWav(filename, Fs) # # # cooef = getCooefVect(signal)
def get_lpc(self, signal): lpc = audiolazy.lpc(signal, self.n_lpc).numerator return lpc[1:]
plt.plot(data_chest, label = 'chest') plt.plot(data_falsetto, label = 'falsetto') plt.plot(data_background, label = 'background') plt.legend() print len(data_chest) print "new sampling frequency: ", fs / 4.0 s, Hz = lz.sHz(fs / 4.0) # <codecell> lpc_chest = lz.lpc(data_chest, order = 14) lpc_falsetto = lz.lpc(data_falsetto, order = 7) lpc_background = lz.lpc(data_background, order = 14) print lpc_chest.error print lpc_falsetto.error print lpc_background.error # <codecell> plt.close('all') maxf = 5000.0 # fig = (1 / lpc_chest).plot(rate = s * 1, samples = maxf / fs * Nfft, unwrap = False, max_freq = maxf * Hz / 1)