예제 #1
0
def test_lpc_multi(y_multi):
    y, sr = y_multi

    Lall = librosa.lpc(y, order=6)
    L0 = librosa.lpc(y[0], order=6)
    L1 = librosa.lpc(y[1], order=6)

    assert np.allclose(Lall[0], L0)
    assert np.allclose(Lall[1], L1)
    assert not np.allclose(L0, L1)
def calculateRPCC(audio):
    sample_frequency = 16000
    frame_length = sample_frequency // 4 # 25 ms
    low_frequency = 20
    high_frequency = 7600
    num_mel_bins = 30

    # s_n, _ = librosa.load(path, sr=16000)
# #         # Calculate Residual Signal
    lpc_coefficients = librosa.lpc(audio, 22)
    s_n_hat = scipy.signal.lfilter([0] + -1*lpc_coefficients[1:], [1], audio)
    r_n = audio - s_n_hat
    #     # Do Hilbert Transform
    #     # Analytical Signal
    analytical_signal = scipy.signal.hilbert(r_n)
#     # Residual phase
    residual_phase = analytical_signal.real / abs(analytical_signal)
# #         # FFT and magnitude
    win_length = sample_frequency // 4
    hop_length = sample_frequency // 10
    n_fft = 4096 # QUESTION: Should this be changed?
    magnitude_spectrum = np.abs(librosa.stft(residual_phase, n_fft=n_fft, win_length=win_length, hop_length=hop_length))
    # Warp to mel
    mel = librosa.filters.mel(sr=sample_frequency, n_fft=n_fft, n_mels=num_mel_bins, fmin=low_frequency, fmax=high_frequency)
    mel_warped_signal = mel.dot(magnitude_spectrum)
    # Log Signal
    log_spectrum = np.log(mel_warped_signal)
    # DCT of Signal
    dct_spectrum = scipy.fftpack.dct(log_spectrum)

    return dct_spectrum
예제 #3
0
def lpc_emotion_upload():
    entry = dict()
    wav_files = []
    SAMPLE_RATE = 44100
    b, _ = librosa.core.load('pickles/catalyst.wav', sr=SAMPLE_RATE)
    y, sr = librosa.load('pickles/catalyst.wav')
    lpc = librosa.lpc(y, 5)
    for no in range(0, len(lpc)):
        entry['LIB_LPC{0}'.format(no)] = lpc[no]
    y, sr = librosa.load('pickles/catalyst.wav')
    pitches, magnitudes = librosa.core.piptrack(y, sr)
    # Select out pitches with high energy
    pitches = pitches[magnitudes > np.median(magnitudes)]
    pit = librosa.pitch_tuning(pitches)

    entry['pitch'] = pit

    wav_files.append(entry)
    wav_df = pd.DataFrame(wav_files)
    lpc_clf = joblib.load('pickles/lpc_model.sav')

    bar = pd.DataFrame(lpc_clf.predict_proba(wav_df))
    bar.columns = lpc_clf.classes_
    bar_t = bar.T
    bar_t.columns = ['values']
    print('HERE')

    fig = go.Figure(data=[
        go.Pie(labels=lpc_clf.classes_, values=bar_t['values'], hole=.3),
    ])
    return lpc_clf.predict(wav_df), fig
예제 #4
0
def extract_feature(file_name):

    result = np.array([])
    y, sr = librosa.load(file_name)
    LPC = librosa.lpc(y, 16)
    print(LPC, file_name)
    return result
예제 #5
0
파일: data_utils.py 프로젝트: cncn0069/AI
def raw_data_processing(data_path):
    data = list()
    label = list()

    file_dir = os.listdir(path=data_path)

    for i in range(len(file_dir)):
        file_list = os.listdir(data_path + "\\" + file_dir[i])
        for j in range(len(file_list)):
            file_name = data_path + "\\" + file_dir[i] + "\\" + file_list[j]
            print("file_name: ", file_name)
            audio_file, sr = librosa.load(
                path=file_name, sr=int(mediainfo(file_name)['sample_rate']))

            try:
                data.append(librosa.lpc(audio_file, 100))
                label.append(i)
            except:
                print("Error occured at ", file_name)
                continue

    data = np.asarray(data)
    label = np.asarray(label)

    return data, label
    def get_wav_df(self):
        wav_files = []
        for wav in os.listdir(self.wav_dir):
            if wav.endswith('.wav'):
                entry = dict()
                entry['Session'] = wav

                fs, signal = swav.read(self.wav_dir + '/' + wav)
                y, sr = librosa.load(self.wav_dir + '/' + wav)
                lpc = librosa.lpc(y, 5)
                for no in range(0, len(lpc)):
                    entry['LIB_LPC{0}'.format(no)] = lpc[no]
                y, sr = librosa.load(self.wav_dir + '/' + wav)
                pitches, magnitudes = librosa.core.piptrack(y, sr)
                # Select out pitches with high energy
                pitches = pitches[magnitudes > np.median(magnitudes)]
                pit = librosa.pitch_tuning(pitches)

                entry['pitch'] = pit

                wav_files.append(entry)

        # wav_files = []
        # entry = dict()
        # iemocap_wav_list = self._load()
        # print(iemocap_wav_list.getframerate())
        # print(iemocap_wav_list)
        # entry['Session'] = glob.glob("*.wav", iemocap_wav_list)
        # if bool(entry):
        #     wav_files.append(entry)
        wav_df = pd.DataFrame(wav_files)
        return wav_df
예제 #7
0
def extract_LLD_from_audio(audio, fs):
    # MFCC
    mfcc = librosa.feature.mfcc(audio,
                                fs,
                                n_fft=N_FFT,
                                hop_length=HOP_LENGTH,
                                center=False).transpose()
    mfcc_hsf = extract_HSF(mfcc)

    # LPC
    lpc = librosa.lpc(audio, 16)

    # Mel-Spectrogram
    spect = librosa.feature.melspectrogram(y=audio,
                                           sr=fs,
                                           n_fft=N_FFT,
                                           hop_length=HOP_LENGTH,
                                           center=False)
    spect = librosa.power_to_db(spect, ref=np.max).transpose()
    spect_hsf = extract_HSF(spect)

    # Other features
    f0 = get_F_0(audio, fs)[0]
    hnr = get_HNR(audio, fs)

    return np.asarray(mfcc), np.asarray(mfcc_hsf), np.asarray(lpc), np.asarray(
        spect), np.asarray(spect_hsf), np.asarray([f0, hnr])
예제 #8
0
def get_lpc_feature(input_audio, sampling_rate, order = 20, preemphasis = True, includeDerivatives = True, win = np.hamming(160), inc = 80):
    # audio, sr = librosa.load(input_audio, sr=sampling_rate)

    audio = input_audio

    # Pre-emphasis filter (zero is at 50 Hz)
    if(preemphasis):
        audio = signal.lfilter([1, -np.exp(-2*np.pi*50/sampling_rate)],1,audio)

    # Get frames from input audio
    frame = get_frame_from_file(audio, win=win, inc=inc, sr=sampling_rate, n_channels=1, duration = None)
    c = np.zeros((frame.shape[1], order))

    # Compute LPC coefficients
    for i in range(c.shape[0]):
        lpc_ftr = librosa.lpc(frame[:,i], order)
        c[i,:] = lpc_ftr[1:]
    nf = c.shape[0]

    # Calculate derivative
    if(includeDerivatives):
      vf=np.arange(4,-5,-1)/60
      ww=np.zeros(4, dtype=int)
      cx = np.vstack((c[ww,:], c, c[(nf-1)*(ww+1),:]))
      filtered_cx = signal.lfilter(vf,1,np.transpose(cx).flatten())
      dc = np.reshape(filtered_cx,(nf+8,order),order='F')
      dc = np.delete(dc, np.arange(0,8), axis=0)
      c = np.hstack((c,dc))
      c = np.transpose(c)
      c = c.astype(np.single)

    return c
예제 #9
0
def lpc_coeffs(myData):
    MyFs = constants.fs
    MyCoeffs = 512
    Lpcs = librosa.lpc(myData, MyCoeffs)
    MyLocs, MyFreqs = freqz(1, Lpcs, worN=MyCoeffs, fs=MyFs)
    MyFreqs = 20 * np.log10(np.abs(MyFreqs))
    return MyLocs, MyFreqs
예제 #10
0
def lpc_filtering(regions_lpc, audio_f, audio_noised, coef_number):
    lpc_coeffs = []
    error = []
    for start, end in regions_lpc:
        a = librosa.lpc(audio_noised[start:end], coef_number)
        lpc_coeffs.append(a)
        error.extend(scipy.signal.lfilter(a, [1], audio_f[start:end]))
    error = np.array(error)
    return error, lpc_coeffs
예제 #11
0
파일: test_core.py 프로젝트: bubaic/librosa
def test_lpc_simple():
    srand()

    n = 5000
    est_a = np.zeros((n, 6))
    truth_a = [1, 0.5, 0.4, 0.3, 0.2, 0.1]
    for i in range(n):
        noise = np.random.randn(1000)
        filtered = scipy.signal.lfilter([1], truth_a, noise)
        est_a[i, :] = librosa.lpc(filtered, 5)
    assert np.allclose(truth_a, np.mean(est_a, axis=0), rtol=0, atol=1e-3)
예제 #12
0
def lp(y, order=16):
    """
    Uses librosa.core.lpc to calculate linear prediction coefficients
    via Burg’s method. Then use scipy.signal.lfilter to calculate the LP
    residual by inverse filtering.

    """

    a = librosa.lpc(y, order=order)

    return scipy.signal.lfilter([0] + -1 * a[1:], [1], y)
예제 #13
0
def lpc(y, sr, args):
    # LPC coefficients
    coefs = librosa.lpc(y, args.order)

    # White noise
    wn = np.random.uniform(size=int(sr * args.sec))

    # Convolution
    print(coefs.shape)
    print(y.shape)
    y_out = sig.lfilter([1.0], coefs, wn)
    y_out = y_out / np.max(y_out)
    return y_out
예제 #14
0
def formant_signal(x, order):
    '''
    formant_signal(x,order) takes wave amplitude array (x) and returns the LPC approximation
    (s_formant) based on a linear autoregressive model to a given specified order (order).

    s_formant can be understood as the (sourceless) amplitude due to the vocal formants of the signal x.
    '''

    a = librosa.lpc(x, order)  # (x,16)
    b = np.hstack([[0], -1 * a[1:]])
    s_formant = sp.signal.lfilter(b, [1], x)

    return s_formant
예제 #15
0
def devideExcitationAndVocal(y=None):
    l = len(y)
    if l != 512:
        print(l)
    y = y * np.hanning(l)  #############加汉宁窗
    ar = librosa.lpc(y, 12)
    ff = scipy.fft(ar, l)**(-1)
    est_x = scipy.signal.lfilter(0 - ar[1:], 1, y)
    ERR = scipy.fft(y - est_x)
    G = np.mean((y - est_x)**2)**0.5

    # return np.log(np.abs(ff[:l//2])+1e-8),np.log(np.abs(ERR[:l//2])+1e-8)
    return np.abs(ff[:l // 2]) * G, np.abs(ERR[:l // 2])
예제 #16
0
def findFormantes(datos):
  datos = np.asfortranarray(datos)
  A = librosa.lpc(datos, 16)
  raices = np.roots(A) #formantes!
  formantes=[]
  for k in raices:
    if(k.imag>0):
      w = np.arctan2(k.imag, k.real)
      Fk = w*(rate/(2*np.pi))
      Bw = (-1/2)*(rate/(2*np.pi))*np.log((k.real**2+k.imag**2)**(1/2))
      if(Fk>90 and Bw<450):
        formantes.append(Fk)

  return np.sort(formantes)
예제 #17
0
def feature_extraction(file_name):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        result = np.array([])

        lpc = np.mean(librosa.lpc(X, 16).T, axis=0)
        result = np.hstack((result, lpc))

        mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=50).T,
                        axis=0)
        result = np.hstack((result, mfccs))

    return result
def extract_feature(file_name, mfcc, lpc, mel):
    X, sample_rate = librosa.load(file_name)
    result = np.array([])
    if mfcc:
        stft = np.abs(librosa.stft(X))
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=13).T, axis=0)
            result = np.hstack((result, mfccs))
        if lpc:
            lpc = (librosa.lpc(X,16))
            result = np.hstack((result, lpc))
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
            result = np.hstack((result, mel))
    return result
def get_formants(x, lp_order, nr_formants):

    #compute lp coefficients
    a = librosa.lpc(x, lp_ord)

    #get roots from lp coefficients
    rts = np.roots(a)
    rts = [r for r in rts if np.imag(r) >= 0]

    #get angles
    angz = np.arctan2(np.imag(rts), np.real(rts))

    #get formant frequencies
    formants = sorted(angz * (fs_targ / (2 * math.pi)))

    return formants[0:nr_formants]
def create_lpspectrum(file, order):
    file = str(file)
    audio_path = 'static/wav/audio' + file + '.wav'
    audio, sampling_rate = librosa.load(audio_path)

    lp_result = librosa.lpc(audio, order)
    y_result = scipy.signal.lfilter([0] + -1 * lp_result[1:], [1], audio)

    plt.figure()
    plt.grid()
    plt.magnitude_spectrum(y_result, Fs=sampling_rate, color="blue")
    plt.title('LP Spectrum')
    plt.grid(color='grey', linestyle='--', linewidth=0.5)
    plt.savefig('static/images/lpspectrum-wav' + file + '-order' + str(order) +
                '.png')
    plt.close()
    return plt
예제 #21
0
def lpc(data, window=256, overlap=128, order=6):

    startindex = 0
    endindex = window

    features = []

    while endindex <= len(data):

        y = data[startindex:endindex]
        lpc = librosa.lpc(y, order)
        features.append(lpc)

        startindex = endindex - overlap
        endindex = startindex + window

    return features
예제 #22
0
def lp_residual(file, order):

    file = str(file)
    audio_path = 'static/wav/audio' + file + '.wav'
    audio, sampling_rate = librosa.load(audio_path)

    lp_result = librosa.lpc(audio, order)
    y_result = scipy.signal.lfilter([0] + -1 * lp_result[1:], [1], audio)

    output_file("lpresidual-wav" + file + "-order" + str(order) + ".html")
    p = figure(plot_width=550,
               plot_height=300,
               x_axis_label='Time',
               y_axis_label="Magnitude")
    p.title = Title(text='LP Residual')
    timefilter = [i for i in range(len(audio))]
    p.line(timefilter, audio - y_result, line_width=2)

    show(p)
def create_lpresidual(file, order):
    file = str(file)
    audio_path = 'static/wav/audio' + file + '.wav'
    audio, sampling_rate = librosa.load(audio_path)

    lp_result = librosa.lpc(audio, order)
    y_result = scipy.signal.lfilter([0] + -1 * lp_result[1:], [1], audio)

    plt.figure()
    plt.grid()
    plt.plot(audio - y_result)
    plt.xlabel("Time")
    plt.ylabel("Magnitude")
    plt.title('LP Residual')
    plt.grid(color='grey', linestyle='--', linewidth=0.5)
    plt.savefig('static/images/lpresidual-wav' + file + '-order' + str(order) +
                '.png')
    plt.close()
    return plt
    def get_lpc_tonality(self):
        """Ratio of fourth to zeroth LPC coefficient.

        Metric for wheeze detection based on the observation by Oletic et al.
        that linear predictive coding (LPC) estimation error tends to fall off
        more rapidly (i.e., in fewer coefficients) with tonal sounds than with
        non-tonal sounds. More tonal sounds will have a higher value of this ratio.

        **due to instability issues in librosa.lpc, this is currently not
        returned in self.get_features()**

        Oletic, Dinko, Bruno Arsenali, and Vedran Bilas. 2012. “Towards continuous
            wheeze detection body sensor node as a core of asthma monitoring system.”
            In Wireless Mobile Communication and Healthcare, 83:165–72.
            https://doi.org/10.1007/978-3-642-29734-2_23.

        """

        lpc_ = librosa.lpc(self.x, 4)
        return np.abs(lpc_[0] / lpc_[4])
예제 #25
0
def formatSignal(file,frame_length=320,LPCsize=16):
    wave,_ = wavelib.load(file,sr=None)
    signalLen = wave.shape[0]
    sizeof = signalLen //  frame_length 
    
    wave[1:] = -0.85 * wave[:-1] + wave[1:]
    wave = wave[:sizeof*frame_length]
    wave = wave.reshape([sizeof,frame_length])

    input_date = np.hstack((wave[:-1,:],wave[1:,:]))
    baseline = wave[1:,:]
    N,_ =  baseline.shape
    LPC = np.zeros((N,LPCsize))
    for i in range(N):
        a = wavelib.lpc(baseline[i], LPCsize)
        LPC[i,:] = -1 * a[1:]
    
    Initial_state = input_date[:,frame_length-LPCsize:frame_length]

    return input_date,baseline,LPC,Initial_state
예제 #26
0
def formants(signal, width, step, fs, nb=4):
    """
    formants(signal, width, step, fs, nb=4)
        Compute the formants for all frames of a signal.

        Parameters
        ----------
        signal : ndarray
        width : float
            The width of frame in ms.
        step : float
            The step between two frames in ms.
        fs : float
            The sampling frequency.
        nb : integer
            The number of formants considers for each frame.

        Returns
        -------
        formants : ndarray
            An 2 dim array with the formants of each frame.
    """
    frames = split(signal, width, step, fs)
    b, a = [1, -0.67], [1]
    roots = []
    for frame in frames:
        filtered_frame = sgl.lfilter(b, a, frame)
        hamming_win = sgl.windows.hamming(filtered_frame.size)
        filtered_frame *= hamming_win  # apply hamming window on the frame
        lpcs = lpc(filtered_frame, 9)
        root = np.roots(lpcs)
        frame_res = root[root.imag > 0][:nb]
        if len(frame_res < nb):
            frame_res = np.concatenate(
                (frame_res, [0] * (nb - len(frame_res))))
            frame_res = np.sort(frame_res)
        roots.append(frame_res)

    angles = np.angle(roots)
    freq = angles * (fs / (2 * np.pi))
    return np.sort(freq, axis=1)
예제 #27
0
def compute_lsfs(audio,
                 expected_len=100,
                 n_lsfs=20,
                 sample_rate=16000,
                 frame_size=512):
    """Scaled Line spectral frequencies.

  Args:
    audio: Numpy ndarray or tensor. Shape [batch_size, audio_length] or
      [batch_size,].
    expected_len: Expected feature series length: this preference over 
        frame_size roots from choice of DDSP authors in using 
        pad_or_trim_to_expected_length() function that impose a length that 
        may not match the ones computed using parameters like frame_size. 
    n_lsfs: Number of LSF values to return
    sample_rate: Audio sample rate in Hz.

  Returns:
    Line spectral frequencies. Shape [batch_size, n_lsfs] or [n_lsfs,].
  """

    window_func = create_window(frame_size, name='hanning')

    LSFS = np.zeros((expected_len, n_lsfs))
    start_indexes = np.linspace(0,
                                audio.size - frame_size - 1,
                                num=expected_len,
                                endpoint=True).astype('int')

    for window_index in range(expected_len):
        #WINDOWING
        start_index = start_indexes[window_index]
        windowed_sig = np.multiply(audio[start_index:start_index + frame_size],
                                   window_func)

        #ANALYSIS: LSF parameter extraction
        a = librosa.lpc(windowed_sig, n_lsfs)
        lsfs = np.array(
            poly2lsf(a)) / np.pi  #division by PI will put lsfs in 0-1 range
        LSFS[window_index] = lsfs
    return LSFS
def compute_lpc_lsp(wav_file, label, feature_name, lsp_nums=[0, 1]):
    y, sr = librosa.load(wav_file)
    #rate,data=read(wav_file)
    plt.figure()
    snd = parselmouth.Sound(wav_file)
    spectrogram = snd.to_spectrogram()
    draw_spectrogram(spectrogram)
    plt.twinx()
    intensity = snd.to_intensity(time_step=0.025, minimum_pitch=75)
    time_stamps = intensity.xs()
    colors = ["r", "b"]
    for lsp_num in lsp_nums:
        lsp_values = []
        for i in range(len(time_stamps) - 1):
            curr_data = y[int(time_stamps[i] * sr):int((time_stamps[i] + 1) *
                                                       sr)]
            lpc_value = librosa.lpc(curr_data, 8)
            lsp = lpc2lsp(lpc_value, otype=0)[lsp_num + 1]
            lsp_values.append(lsp / (np.pi) * sr)
        plt.ylabel("LSP frequency [Hz]")
        plt.plot(time_stamps[:-1],lsp_values,'o',markersize=5,\
            color="w")
        plt.plot(time_stamps[:-1],
                 lsp_values,
                 'o',
                 markersize=2,
                 color=colors[lsp_num],
                 label="LSPfreuqnecy[{}]".format(lsp_num))
        #quantile_value=np.mean(lsp_values)
        # plt.plot(time_stamps[:-1],[quantile_value]*len(time_stamps[:-1]),markersize=2,\
        #     color=colors[lsp_num],label="LSPfrequency[{}] mean".format(lsp_num))
        #quantile_value=min(lsp_values)
        #plt.plot(time_stamps[:-1],[quantile_value]*len(time_stamps[:-1]),markersize=2,color="b",label="percentile1.0")
        plt.xlim([snd.xmin, snd.xmax])
        plt.ylim([0, 5000])
    plt.legend(loc="upper right")
    plt.savefig("/home/jialu/voice_quality_plots/v2/lspFrequency/" + label +
                "_" + feature_name + ".png")
    plt.close()
예제 #29
0
def lp_spectrum(file, order):

    file = str(file)
    audio_path = 'static/wav/audio' + file + '.wav'
    audio, sampling_rate = librosa.load(audio_path)

    lp_result = librosa.lpc(audio, order)
    y_result = scipy.signal.lfilter([0] + -1 * lp_result[1:], [1], audio)

    n = len(audio)
    T = 1 / sampling_rate
    yf = scipy.fft(y_result)
    xf = np.linspace(0.0, 1.0 / (2.0 * T), n / 2)

    output_file("lpspectrum-wav" + file + "-order" + str(order) + ".html")
    p = figure(plot_width=550,
               plot_height=300,
               x_axis_label='Frequency',
               y_axis_label="Amplitude")
    p.title = Title(text='LP Spectrum')
    p.line(xf, 2.0 / n * np.abs(yf[:n // 2]), line_width=2)

    show(p)
예제 #30
0
파일: utilsTorch.py 프로젝트: mendo88/AI
def get_formants(x, Fs, nformants):

    # Read from file.
    # print("leyendo archivo..")
    # Fs, x = wv.read(file_path)

    #print("Freq Mues: ", Fs)

    # Get Hamming window.
    N = len(x)
    w = hamming(N)

    # Apply window and high pass filter.
    x1 = x * w
    x1 = lfilter([1], [1., 0.63], x1)

    # Get LPC.
    #print("calculando lpc..")
    ncoeff = 2 + (Fs / 1000)

    # print("NCOEF: ", int(ncoeff))
    A = lpc(x1, int(ncoeff))
    # print(A)

    # Get roots.
    rts = np.roots(A)
    rts = [r for r in rts if np.imag(r) >= 0]

    # Get angles.
    angz = np.arctan2(np.imag(rts), np.real(rts))

    # Get frequencies.
    # Fs = spf.getframerate()
    #print("calculando formantes..")
    frqs = sorted(angz * (Fs / (2 * math.pi)))

    return frqs[:nformants]