def arspec(x, order, nfft=None, fs=1): """Compute the spectral density using an AR model. An AR model of the signal is estimated through the Yule-Walker equations; the estimated AR coefficient are then used to compute the spectrum, which can be computed explicitely for AR models. Parameters ---------- x : array-like input signal order : int Order of the LPC computation. nfft : int size of the fft to compute the periodogram. If None (default), the length of the signal is used. if nfft > n, the signal is 0 padded. fs : float Sampling rate. By default, is 1 (normalized frequency. e.g. 0.5 is the Nyquist limit). Returns ------- pxx : array-like The psd estimate. fgrid : array-like Frequency grid over which the periodogram was estimated. """ x = np.atleast_1d(x) n = x.size if x.ndim > 1: raise ValueError("Only rank 1 input supported for now.") if not np.isrealobj(x): raise ValueError("Only real input supported for now.") if not nfft: nfft = n if nfft < n: raise ValueError("nfft < signal size not supported yet") a, e, k = lpc(x, order) # This is not enough to deal correctly with even/odd size if nfft % 2 == 0: pn = nfft / 2 + 1 else: pn = (nfft + 1) / 2 px = 1 / np.fft.fft(a, nfft)[:pn] pxx = np.real(np.conj(px) * px) pxx /= fs / e fx = np.linspace(0, fs * 0.5, pxx.size) return pxx, fx
def arspec(x, order, nfft=None, fs=1): """Compute the spectral density using an AR model. An AR model of the signal is estimated through the Yule-Walker equations; the estimated AR coefficient are then used to compute the spectrum, which can be computed explicitely for AR models. Parameters ---------- x : array-like input signal order : int Order of the LPC computation. nfft : int size of the fft to compute the periodogram. If None (default), the length of the signal is used. if nfft > n, the signal is 0 padded. fs : float Sampling rate. By default, is 1 (normalized frequency. e.g. 0.5 is the Nyquist limit). Returns ------- pxx : array-like The psd estimate. fgrid : array-like Frequency grid over which the periodogram was estimated. """ x = np.atleast_1d(x) n = x.size if x.ndim > 1: raise ValueError("Only rank 1 input supported for now.") if not np.isrealobj(x): raise ValueError("Only real input supported for now.") if not nfft: nfft = n if nfft < n: raise ValueError("nfft < signal size not supported yet") a, e, k = lpc(x, order) # This is not enough to deal correctly with even/odd size if nfft % 2 == 0: pn = nfft / 2 + 1 else: pn = (nfft + 1 )/ 2 px = 1 / np.fft.fft(a, nfft)[:pn] pxx = np.real(np.conj(px) * px) pxx /= fs / e fx = np.linspace(0, fs * 0.5, pxx.size) return pxx, fx
def encode(fn, codebook_lpc, codebook_e): out = zeros(len(data) * 2) out = [] mean_sig = 0 pitches = [] #signal [0] je fs signal = scipy.io.wavfile.read(fn)[1] / 2.0**15 for idx, frame in enumerate(make_frames( signal)): #sf.frames(fn, framesize=FRAME_SIZE, overlap=OVERLAP)): #mean_sig=0.1*mean(frame)+0.9*mean_sig frame -= mean(signal) #autokorelacni koeficienty c = (correlate(frame, frame, mode='full')) c = c[len(c) / 2:] f0_sam = get_f0(c) pitches.append(f0_sam) is_voiced = voiced(c) if not is_voiced: f0_sam = 0 #frame = preemphasis(frame) #rxx = correlate(frame,frame, mode='full') #rxx = rxx[len(rxx) / 2:] #a, k = levinson_algorithm(rxx[:LPC + 1]) #e = sqrt(mean(lfilter(a, (1,), frame)**2)) a, e, _ = lpc(frame, LPC) e = sqrt(e)[0] # vezmeme z codebooku e_idx = vq(e, codebook_e)[0] a = a.reshape((1, LPC + 1)) a_idx = vq(a, codebook_lpc)[0] # e = rms(lfilter(a, (1,), frame)) out.append(a_idx) out.append(e_idx) out.append(f0_sam) #fig, axs = plt.subplots(1, 1) #axs.plot(frame) return numpy.asarray(out)
def atal(x, order, num_coefs): x = np.atleast_1d(x) n = x.size if x.ndim > 1: raise ValueError("Only rank 1 input supported for now.") if not np.isrealobj(x): raise ValueError("Only real input supported for now.") a, e, kk = lpc(x, order) c = np.zeros(num_coefs) c[0] = a[0] for m in range(1, order + 1): c[m] = -a[m] for k in range(1, m): c[m] += (float(k) / float(m) - 1) * a[k] * c[m - k] for m in range(order + 1, num_coefs): for k in range(1, order + 1): c[m] += (float(k) / float(m) - 1) * a[k] * c[m - k] return c
def get_lpc(wav_dir): sample_rate, audio = scipy.io.wavfile.read(wav_dir) audio_array = numpy.array(audio) lpcs_a, lpcs_e, lpcs_k = lpc(audio_array, 12) return list(lpcs_a)
files.close() num = 0 posi_data = numpy.ones((1, 1)) for audio_file in file_list: (rate, audio_ori) = wav.read(audio_file.rstrip(".raw\n")+".wav") with open(audio_file.rstrip("nn.raw\n")+".wrd") as files: seg_info = files.readlines() files.close() for info in seg_info: [start, end] = info.split()[0:2] #shave head and tail audio = audio_ori[int(start) + int(window_length * rate):int(end) - int(window_length * rate)] for idx in range(0, audio.shape[0] - int(window_length * rate), int(voca_step * rate)): chip = audio[idx:idx + int(window_length * rate)] (l, _, _) = lpc(chip, lpc_order) m = mfcc.calcMFCC_delta_delta(signal=chip, samplerate=rate, win_length=window_length)[0] cnt_data = numpy.hstack((l, m)) if posi_data.size == 1: posi_data = cnt_data else: posi_data = numpy.row_stack((posi_data, cnt_data)) if posi_data.shape[0] > max_length: numpy.save("posi/data"+str(num), posi_data) print "posi/data"+str(num) print audio_file del posi_data num = num + 1 posi_data = numpy.ones((1,1)) numpy.save("posi/data"+str(num), posi_data)