Beispiel #1
0
def arspec(x, order, nfft=None, fs=1):
    """Compute the spectral density using an AR model.

    An AR model of the signal is estimated through the Yule-Walker equations;
    the estimated AR coefficient are then used to compute the spectrum, which
    can be computed explicitely for AR models.

    Parameters
    ----------
    x : array-like
        input signal
    order : int
        Order of the LPC computation.
    nfft : int
        size of the fft to compute the periodogram. If None (default), the
        length of the signal is used. if nfft > n, the signal is 0 padded.
    fs : float
        Sampling rate. By default, is 1 (normalized frequency. e.g. 0.5 is the
        Nyquist limit).

    Returns
    -------
    pxx : array-like
        The psd estimate.
    fgrid : array-like
        Frequency grid over which the periodogram was estimated.
    """

    x = np.atleast_1d(x)
    n = x.size

    if x.ndim > 1:
        raise ValueError("Only rank 1 input supported for now.")
    if not np.isrealobj(x):
        raise ValueError("Only real input supported for now.")
    if not nfft:
        nfft = n
    if nfft < n:
        raise ValueError("nfft < signal size not supported yet")

    a, e, k = lpc(x, order)

    # This is not enough to deal correctly with even/odd size
    if nfft % 2 == 0:
        pn = nfft / 2 + 1
    else:
        pn = (nfft + 1) / 2

    px = 1 / np.fft.fft(a, nfft)[:pn]
    pxx = np.real(np.conj(px) * px)
    pxx /= fs / e
    fx = np.linspace(0, fs * 0.5, pxx.size)
    return pxx, fx
Beispiel #2
0
def arspec(x, order, nfft=None, fs=1):
    """Compute the spectral density using an AR model.

    An AR model of the signal is estimated through the Yule-Walker equations;
    the estimated AR coefficient are then used to compute the spectrum, which
    can be computed explicitely for AR models.

    Parameters
    ----------
    x : array-like
        input signal
    order : int
        Order of the LPC computation.
    nfft : int
        size of the fft to compute the periodogram. If None (default), the
        length of the signal is used. if nfft > n, the signal is 0 padded.
    fs : float
        Sampling rate. By default, is 1 (normalized frequency. e.g. 0.5 is the
        Nyquist limit).

    Returns
    -------
    pxx : array-like
        The psd estimate.
    fgrid : array-like
        Frequency grid over which the periodogram was estimated.
    """

    x = np.atleast_1d(x)
    n = x.size

    if x.ndim > 1:
        raise ValueError("Only rank 1 input supported for now.")
    if not np.isrealobj(x):
        raise ValueError("Only real input supported for now.")
    if not nfft:
        nfft = n
    if nfft < n:
        raise ValueError("nfft < signal size not supported yet")

    a, e, k = lpc(x, order)

    # This is not enough to deal correctly with even/odd size
    if nfft % 2 == 0:
        pn = nfft / 2 + 1
    else:
        pn = (nfft + 1 )/ 2

    px = 1 / np.fft.fft(a, nfft)[:pn]
    pxx = np.real(np.conj(px) * px)
    pxx /= fs / e
    fx = np.linspace(0, fs * 0.5, pxx.size)
    return pxx, fx
Beispiel #3
0
def encode(fn, codebook_lpc, codebook_e):
    out = zeros(len(data) * 2)
    out = []
    mean_sig = 0
    pitches = []
    #signal [0] je fs
    signal = scipy.io.wavfile.read(fn)[1] / 2.0**15

    for idx, frame in enumerate(make_frames(
            signal)):  #sf.frames(fn, framesize=FRAME_SIZE, overlap=OVERLAP)):
        #mean_sig=0.1*mean(frame)+0.9*mean_sig
        frame -= mean(signal)

        #autokorelacni koeficienty
        c = (correlate(frame, frame, mode='full'))
        c = c[len(c) / 2:]
        f0_sam = get_f0(c)

        pitches.append(f0_sam)
        is_voiced = voiced(c)

        if not is_voiced:
            f0_sam = 0

        #frame = preemphasis(frame)
        #rxx = correlate(frame,frame, mode='full')
        #rxx = rxx[len(rxx) / 2:]
        #a, k = levinson_algorithm(rxx[:LPC + 1])
        #e = sqrt(mean(lfilter(a, (1,), frame)**2))
        a, e, _ = lpc(frame, LPC)
        e = sqrt(e)[0]

        # vezmeme z codebooku
        e_idx = vq(e, codebook_e)[0]
        a = a.reshape((1, LPC + 1))
        a_idx = vq(a, codebook_lpc)[0]

        # e = rms(lfilter(a, (1,), frame))

        out.append(a_idx)
        out.append(e_idx)
        out.append(f0_sam)
        #fig, axs = plt.subplots(1, 1)
        #axs.plot(frame)
    return numpy.asarray(out)
Beispiel #4
0
def atal(x, order, num_coefs):
    x = np.atleast_1d(x)
    n = x.size
    if x.ndim > 1:
        raise ValueError("Only rank 1 input supported for now.")
    if not np.isrealobj(x):
        raise ValueError("Only real input supported for now.")
    a, e, kk = lpc(x, order)
    c = np.zeros(num_coefs)
    c[0] = a[0]
    for m in range(1, order + 1):
        c[m] = -a[m]
        for k in range(1, m):
            c[m] += (float(k) / float(m) - 1) * a[k] * c[m - k]
    for m in range(order + 1, num_coefs):
        for k in range(1, order + 1):
            c[m] += (float(k) / float(m) - 1) * a[k] * c[m - k]
    return c
def get_lpc(wav_dir):
    sample_rate, audio = scipy.io.wavfile.read(wav_dir)
    audio_array = numpy.array(audio)
    lpcs_a, lpcs_e, lpcs_k = lpc(audio_array, 12)
    return list(lpcs_a)
Beispiel #6
0
files.close()

num = 0
posi_data = numpy.ones((1, 1))
for audio_file in file_list:
	(rate, audio_ori) = wav.read(audio_file.rstrip(".raw\n")+".wav")
	with open(audio_file.rstrip("nn.raw\n")+".wrd") as files:
		seg_info = files.readlines()
	files.close()
	for info in seg_info:
		[start, end] = info.split()[0:2]
		#shave head and tail
		audio = audio_ori[int(start) + int(window_length * rate):int(end) - int(window_length * rate)]
		for idx in range(0, audio.shape[0] - int(window_length * rate), int(voca_step * rate)):
			chip = audio[idx:idx + int(window_length * rate)]
			(l, _, _) = lpc(chip, lpc_order)
			m = mfcc.calcMFCC_delta_delta(signal=chip, samplerate=rate, win_length=window_length)[0]	
			cnt_data = numpy.hstack((l, m))
			if posi_data.size == 1:
				posi_data = cnt_data
			else:
				posi_data = numpy.row_stack((posi_data, cnt_data))
		if posi_data.shape[0] > max_length:
			numpy.save("posi/data"+str(num), posi_data)
			print "posi/data"+str(num)
			print audio_file
			del posi_data
			num = num + 1
			posi_data = numpy.ones((1,1))
					
numpy.save("posi/data"+str(num), posi_data)