Ejemplo n.º 1
0
def obtain_fft_in_db(data, n_fft):
    # Viewing the fft of the entire signal
    data, phase = librosa.core.magphase(fft(data, n=n_fft // 2 + 1))
    data = librosa.util.normalize(data)
    data = amplitude_to_db(data)
    data += 120

    return data
Ejemplo n.º 2
0
def save_spectrogram(data, output_file):
    specshow(amplitude_to_db(data, ref=np.max),
             sr=TARGET_SAMPLE_RATE,
             x_axis='time',
             y_axis='cqt_note',
             hop_length=HOP_LENGTH,
             bins_per_octave=BINS_PER_OCTAVE)
    plt.colorbar(format='%+2.0f dB')
    plt.title("Constant-Q Transform")
    plt.tight_layout()
    plt.savefig(output_file)
Ejemplo n.º 3
0
def plot_spectrogram(file):
    audio, sr = lr.load(SOUNDFILE, sr=RATE)
    time = np.arange(len(audio)) / sr
    spec = stft(audio, hop_length=FRAME, n_fft=2**7)
    spec_db = amplitude_to_db(np.abs(spec))

    fig, ax = plt.subplots(figsize=(9,3))
    specshow(spec_db, sr=sr, x_axis='time', y_axis='hz', hop_length=FRAME, ax=ax, cmap='magma')
    fig.suptitle('Spectrogram of the Recording')
    ax.set_ylabel('Frequency in Hz')
    ax.set_xlabel('Time in min:s')
    plt.tight_layout()
    plt.show()
Ejemplo n.º 4
0
def generate_spectrogram(samples, rate, opt):
    plt.figure(figsize=(10, 5))
    if opt == 0:
        plt.title('Spectrogram')
    elif opt == 1:
        plt.title('Harmonic Components Spectrogram')
    elif opt == 2:
        plt.title('Percussive Components Spectrogram')
    ld.specshow(lc.amplitude_to_db(np.abs(lc.stft(samples)), ref=np.max),
                y_axis='log',
                x_axis='time')
    filename = "specplot_" + str(datetime.datetime.now().timestamp()) + ".png"
    plt.savefig(filename)
    with open(filename, 'rb') as f:
        encoding = base64.b64encode(f.read()).decode('utf-8')
    return encoding
 def plot_log_power_specgram(self, sound_names, raw_sounds):
     i = self.i
     fig = plt.figure(figsize=self.figsize, dpi=self.dpi)
     for n, f in zip(sound_names, raw_sounds):
         plt.subplot(10, 1, i)
         D = core.amplitude_to_db(np.abs(librosa.stft(f))**2, ref=np.max)
         """ref_power parameter deprecated after librosa 0.6.0
             and librosa.core.logamplitude has been removed; replaced by amplitude_to_db"""
         #D = librosa.logamplitude(np.abs(librosa.stft(f))**2, ref_power=np.max)
         display.specshow(D, x_axis='time', y_axis='log')
         plt.title(n.title())
         i += 1
     plt.suptitle("Figure 3: Log power spectrogram",
                  x=self.x,
                  y=self.y,
                  fontsize=self.fontsize)
     plt.show()
Ejemplo n.º 6
0
def build_X_y():
    """
    Building X and y for the input and output of the CNN
    """
    tmp = check_data()
    if tmp:
        return tmp.data[0], tmp.data[1]  # return X, y from the pickle folder

    X = []
    y = []
    for index, file in tqdm(enumerate(df['fname'])):
        # if file[0] == 'O' or file[0] == 'C':
        sample_rate, signal = wavfile.read('clean/' +
                                           file)  # Read & 1.processing
        mel = melspectrogram(y=signal,
                             sr=config.sample_rate,
                             n_mels=config.n_mels,
                             n_fft=config.n_fft,
                             hop_length=config.hop_length,
                             window=config.window)

        S = amplitude_to_db(mel)
        S[0] = (2 * S.mean() + S[0]) / 3  # Reducing Noise
        S[1] = (S.mean() + 2 * S[1]) / 3  # Reducing Noise

        random_int = random.randint(0,
                                    3)  # Radom state using different filters
        if random_int == 1:
            S = medfilt2d(S)
        if random_int == 2:
            S = wiener(S)
        if random_int == 3:
            S = S

        X.append(S)
        fname = work_status(file)
        y.append(fname)
    X = np.array(X)
    print(X.shape)
    X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)
    y = np.array(y)
    y = to_categorical(y, num_classes=config.num_classes)
    config.data = (X, y)
    with open(config.p_path, 'wb') as handle:
        pickle.dump(config, handle, protocol=2)
    return X, y
def compute_melgram(audio_path,
                    SR=12000,
                    N_FFT=512,
                    N_MELS=96,
                    HOP_LEN=256,
                    DURA=29.12):  # compute only center portion of the track
    """
    # mel-spectrogram parameters
    SR = 12000
    N_FFT = 512
    N_MELS = 96
    HOP_LEN = 256
    DURA = 29.12  # to make it 1366 frame..
    """
    print('loading...', audio_path)
    src, sr = librosa.load(audio_path, sr=SR)  # load whole signal
    n_sample = src.shape[0]
    n_sample_fit = int(DURA * SR)

    if n_sample < n_sample_fit:  # if too short
        src = np.hstack((src, np.zeros(
            (int(DURA * SR) - n_sample, ))))  # still problem ?
    elif n_sample > n_sample_fit:  # if too long
        sp0 = int((n_sample - n_sample_fit) / 2)
        src = src[sp0:sp0 + n_sample_fit]

    # feature.melspectrogram out still power. Is use amplitude_to_db OK?  Or, is it power_to_db?
    melgram = feature.melspectrogram(y=src,
                                     sr=SR,
                                     hop_length=HOP_LEN,
                                     n_fft=N_FFT,
                                     n_mels=N_MELS)
    ret = core.amplitude_to_db(melgram, ref=1.0)
    """
    # alternative:
    power=2
    S = np.abs( core.stft(y=src, n_fft=N_FFT, hop_length=HOP_LEN)  ) **power
    mel_basis = filters.mel(sr, n_fft=N_FFT, n_mels=N_MELS)
    ret= np.dot(mel_basis, S)
    ret= core.power_to_db(ret, ref=1.0) # mel_basis is still power
    ret= core.amplitude_to_db(ret, ref=1.0) # mel_basis is still power
    """
    ret = ret[np.newaxis, np.newaxis, :]
    return ret
def extract_segments(clip, filename, sets, label, label_name, frames):
    FRAMES_PER_SEGMENT = frames - 1  # 41 frames ~= 950 ms
    WINDOW_SIZE = 512 * FRAMES_PER_SEGMENT  # 23 ms per frame
    STEP_SIZE = 512 * FRAMES_PER_SEGMENT // 2  # 512 * 20 = 10240
    BANDS = 60

    s = 0
    segments = []

    normalization_factor = 1 / np.max(np.abs(clip))
    clip = clip * normalization_factor

    while len(clip[s * STEP_SIZE:s * STEP_SIZE + WINDOW_SIZE]) == WINDOW_SIZE:
        signal = clip[s * STEP_SIZE:s * STEP_SIZE + WINDOW_SIZE]
        melspec = melspectrogram(signal,
                                 sr=22050,
                                 n_fft=1024,
                                 hop_length=512,
                                 n_mels=BANDS)
        logspec = amplitude_to_db(melspec)
        logspec = logspec.T.flatten()[:, np.newaxis].T
        logspec = pd.DataFrame(
            data=logspec,
            dtype='float32',
            index=[0],
            columns=list('logspec_b{}_f{}'.format(i % BANDS, i // BANDS)
                         for i in range(np.shape(logspec)[1])))
        if np.mean(logspec.values) > -70.0:
            segment_meta = pd.DataFrame(
                {
                    'filename': filename,
                    'sets': sets,
                    'label': label,
                    'label_name': label_name,
                    's_begin': s * STEP_SIZE,
                    's_end': s * STEP_SIZE + WINDOW_SIZE
                },
                index=[0])
            segments.append(pd.concat((segment_meta, logspec), axis=1))

        s = s + 1

    segments = pd.concat(segments, ignore_index=True)
    return segments
Ejemplo n.º 9
0
def spectrograms_of_heartbeat_audio(audio, time, sfreq):
    # Prepare the STFT
    HOP_LENGTH = 2**4
    spec = stft(audio, hop_length=HOP_LENGTH, n_fft=2**7)

    # Convert into decibels
    spec_db = amplitude_to_db(spec)

    # Compare the raw audio to the spectrogram of the audio
    fig, axs = plt.subplots(2, 1, figsize=(10, 10), sharex=True)
    axs[0].plot(time, audio)
    specshow(spec_db,
             sr=sfreq,
             x_axis='time',
             y_axis='hz',
             hop_length=HOP_LENGTH)
    plt.show()

    return spec
Ejemplo n.º 10
0
 def __prepateInput(self, input_signal, sampling_rate):
     if sampling_rate != self.__INPUT_SAMPLING_RATE:
         input_signal = self.__resample(input_signal, sampling_rate)
     freq, time, stft = spectrogram(
         input_signal,
         fs=self.__INPUT_SAMPLING_RATE,
         window=get_window(self.__WINDOW, self.__N_SAMPLES_WINDOW),
         # nperseg=None,
         noverlap=self.__N_SAMPLES_OVERLAP,
         nfft=self.__N_SAMPLES_WINDOW,
         # detrend='constant',
         return_onesided=True,
         scaling='spectrum',
         axis=-1,
         mode='complex')
     db_values = amplitude_to_db(np.abs(stft))
     db_values = np.transpose(db_values)[:, np.newaxis, :]
     phase = np.angle(stft)
     return [freq, time, db_values, phase]
Ejemplo n.º 11
0
def engineering_spectral_features(spec, times_spec):
    # Calculate the spectral centroid and bandwidth for the spectrogram
    spec = spec.real.astype("float32")
    bandwidths = lr.feature.spectral_bandwidth(S=spec)[0]
    centroids = lr.feature.spectral_centroid(S=spec)[0]

    # Convert spectrogram to decibels for visualization
    spec_db = amplitude_to_db(spec)

    # Display these features on top of the spectrogram
    fig, ax = plt.subplots(figsize=(10, 5))
    HOP_LENGTH = 2**4
    ax = specshow(spec_db, x_axis='time', y_axis='hz', hop_length=HOP_LENGTH)
    ax.plot(times_spec, centroids)
    ax.fill_between(times_spec,
                    centroids - bandwidths / 2,
                    centroids + bandwidths / 2,
                    alpha=.5)
    ax.set(ylim=[None, 6000])
    plt.show()
Ejemplo n.º 12
0
def plot_esc50_spectrograms():
    esc50dir = './dataset/ESC-50-master/'
    esc50audio = esc50dir + 'audio/'
    esc50meta = esc50dir + 'meta/'
    esc50 = glob(esc50audio + '*.wav')
    esc50 = [s[len(esc50audio):] for s in esc50]

    meta = pd.read_csv(esc50meta + 'esc50.csv')
    roosters = list(meta[meta['category'] == 'rooster']['filename'])
    breathing = list(meta[meta['category'] == 'crow']['filename'])
    hens = list(meta[meta['category'] == 'hen']['filename'])

    fig, axs = plt.subplots(3,2,figsize=(10,8),sharex=True)
    fig.suptitle('Comparison between different Classes')
    i = randint(0,meta.shape[1])
    files = [roosters[i],breathing[i],hens[i]]
    names = ['a Rooster','a Crow','a Hen']
    for j,f in enumerate(files):
        a,f = lr.load(esc50audio+f, sr=RATE)
        t = np.arange(0, len(a)) / f

        axs[j][0].plot(t,a)
        axs[j][0].set_title('Waveform of '+names[j])
        axs[j][0].set_xlabel('Time in s')
        axs[j][0].set_ylabel('Amplitude')

        spec = stft(a, hop_length=FRAME, n_fft=2**7)
        spec_db = amplitude_to_db(np.abs(spec))
        specshow(spec_db, sr=f, x_axis='time', y_axis='hz', hop_length=FRAME, ax = axs[j][1], cmap='magma')

        axs[j][1].set_title('Spectrogram of '+names[j])
        axs[j][1].set_xlabel('Time in s')
        axs[j][1].set_ylabel('Frequency in Hz')

    plt.tight_layout()
    plt.show()
Ejemplo n.º 13
0
def extract_segments(clip,frames=41):
    FRAMES_PER_SEGMENT = frames - 1  # 41 frames ~= 950 ms
    WINDOW_SIZE = 512 * FRAMES_PER_SEGMENT  # 23 ms per frame
    STEP_SIZE = 512 * FRAMES_PER_SEGMENT // 2  # 512 * 20 = 10240
    BANDS = 60
        
    s = 0
    segments = []
    
    normalization_factor = 1 / np.max(np.abs(clip))
    clip = clip * normalization_factor
        
    logspec = 0
    if len(clip[s * STEP_SIZE:s * STEP_SIZE + WINDOW_SIZE]) == WINDOW_SIZE:
        signal = clip[s * STEP_SIZE:s * STEP_SIZE + WINDOW_SIZE]
        melspec = melspectrogram(signal, sr=22050, n_fft=1024, hop_length=512, n_mels=BANDS)
        logspec = amplitude_to_db(melspec)
        logspec = logspec.T.flatten()[:, np.newaxis].T
        logspec = pd.DataFrame(
        data=logspec, dtype='float32', index=[0],
        columns=list('logspec_b{}_f{}'.format(i % BANDS, i // BANDS) for i in range(np.shape(logspec)[1]))
        )

    return logspec
## Engineering spectral features

import librosa as lr

# Calculate the spectral centroid and bandwidth for the spectrogram
bandwidths = lr.feature.spectral_bandwidth(S=spec)[0]
centroids = lr.feature.spectral_centroid(S=spec)[0]
________________________________________________________________

from librosa.core import amplitude_to_db
from librosa.display import specshow

# Convert spectrogram to decibels for visualization
spec_db = amplitude_to_db(spec)

# Display these features on top of the spectrogram
fig, ax = plt.subplots(figsize=(10, 5))
ax = specshow(spec_db, x_axis='time', y_axis='hz', hop_length=HOP_LENGTH)
ax.plot(times_spec, centroids)
ax.fill_between(times_spec,
                centroids - bandwidths / 2,
                centroids + bandwidths / 2,
                alpha=.5)
ax.set(ylim=[None, 6000])
plt.show()
Ejemplo n.º 15
0
# FFT
import numpy as np
import librosa as lr
from librosa.core import stft, amplitude_to_db
from librosa.display import specshow

HOP_LENGTH = 2**4
SIZE_WINDOW = 2**7

audio_spec = stft(audio, hop_length=HOP_LENGTH, n_fft=SIZE_WINDOW)
spec_db = amplitude_to_db(audio_spec)

specshow(spec_db, sr=sfreq, x_axis='time', y_axis='hz', hop_length=HOP_LENGTH)

# spectral centroid and bandwidth
bandwidths = lr.feature.spectral_bandwidth(S=spec)[0]
centroids = lr.feature.spectral_centroid(S=spec)[0]

ax = specshow(spec,
              sr=sfreq,
              x_axis='time',
              y_axis='hz',
              hop_length=HOP_LENGTH)
ax.plot(time_spec, centroids)
ax.fill_between(times_spec,
                centroids - bandwidths / 2,
                centroids + bandwidths / 2,
                alpha=0.5)

centroids_all = []
bandwidths_all = []
Ejemplo n.º 16
0
fourier = fft.fft(channel1)
#print(fourier)
"""plt.figure()
plt.plot(fourier, alpha=0.9, color='blue')
plt.xlabel('k')
plt.ylabel('Amplitude')
plt.show()
"""

# CQT
# On charge le fichier wav avec librosa
x, sr = librosa.load(
    "test.wav", sr=44100,
    mono=True)  # mono=True transforme l'audio en mono (à faire)
cqt = librosa.cqt(x, sr=sr, bins_per_octave=36)
log_cqt = librosa.amplitude_to_db(np.abs(cqt))

# Spectrogram FFT
"""
plt.figure(2, figsize=(8,6))
plt.subplot(211)
Pxx, freqs, bins, im = plt.specgram(channel1, Fs=rate, NFFT=1024, cmap=plt.get_cmap('plasma'))
cbar=plt.colorbar(im)
plt.xlabel('Time (s)')
plt.ylabel('Frequency (Hz)')
cbar.set_label('Intensity dB')
plt.subplot(212)
Pxx, freqs, bins, im = plt.specgram(channel2, Fs=rate, NFFT=1024, cmap=plt.get_cmap('plasma'))
cbar=plt.colorbar(im)
plt.xlabel('Time (s)')
plt.ylabel('Frequency (Hz)')
Ejemplo n.º 17
0
def PlotSpec(S):
    fftsize = S.shape[0] * 2
    specshow(amplitude_to_db(S,ref=np.max), sr=SR * fftsize / FFTSIZE, y_axis="linear")
Ejemplo n.º 18
0
import librosa.core as lib
import numpy as np
from librosa.display import specshow
from librosa.core import amplitude_to_db
from librosa.feature import chroma_stft
import matplotlib.pyplot as plt

# In[6]:

y, sr = lib.load('./data/13_LeadVox.wav')

# In[18]:

stft = lib.stft(y)
specshow(amplitude_to_db(np.abs(stft), ref=np.max),
         x_axis='time',
         y_axis='log')
plt.show()

# In[35]:

pitches, magnitudes = lib.piptrack(y=y, sr=sr)

# In[49]:

# In[32]:

import librosa.onset
#odf = librosa.onset.onset_strength(y=y, sr=sr, hop_length=512)
Ejemplo n.º 19
0
def nlfc(data_orig_mod,
         freq,
         n,
         db_ref,
         start_freq,
         compression_ratio,
         compression_frequency,
         compression_nfft,
         compress=True):
    order = 32
    ftype = 'butter'
    rp = None
    rs = None
    if db_ref is not None:
        ftype = 'cheby2'
        rp = 0.1
        rs = 80
    # Lowpass filter
    w_lp, h_lp = design_filter(order=order,
                               cutoff=[start_freq],
                               fs=fs,
                               freqs=freq,
                               ftype=ftype,
                               rp=rp,
                               rs=rs)
    data_low_pass = apply_filter(data_orig_mod, h_lp)

    plt.figure(get_fig_nums() + 1)
    plt.title('low pass')
    specshow(
        amplitude_to_db(data_low_pass) if db_ref is None else data_low_pass,
        x_axis='time',
        y_axis='linear')
    plt.colorbar()

    # Highpass filter
    w_hp, h_hp = design_filter(order=order,
                               cutoff=[start_freq],
                               btype='highpass',
                               fs=fs,
                               freqs=freq,
                               ftype=ftype,
                               rp=rp,
                               rs=rs)
    data_high_pass = apply_filter(data_orig_mod, h_hp)

    plt.figure(get_fig_nums() + 1)
    plt.title('high pass before')
    specshow(
        amplitude_to_db(data_high_pass) if db_ref is None else data_high_pass,
        x_axis='time',
        y_axis='linear')
    plt.colorbar()

    # Convert signals back to time domain
    data_high_pass_td = istft(data_high_pass, center=center, length=n)

    # Resample time-domain signal
    data_high_pass_td = librosa.core.resample(data_high_pass_td, fs,
                                              compression_frequency)
    n2 = len(data_high_pass_td)

    # FFT with 172 Hz bins and 1.45ms rate
    resample_freqs = fft_frequencies(sr=compression_frequency,
                                     n_fft=compression_nfft)
    data_hp_padded = librosa.util.fix_length(data_high_pass_td,
                                             n2 + compression_nfft // 2)

    adf = lambda frq_list: abs(frq_list - start_freq)
    sf_idx = np.where(freq == min(freq, key=adf))[0][0]

    modulated_carrier_freqs_dict = {}
    modulated_carrier_freqs = []

    for i, f in enumerate(freq[freq < freq[sf_idx]]):
        f_idx = sf_idx + i
        adf_in = lambda frq_list: abs(frq_list - f)
        idx = np.where(resample_freqs == min(resample_freqs, key=adf_in))[0][0]

        modulated_carrier_freqs_dict.update({
            (idx, resample_freqs[idx]): (idx, resample_freqs[idx])
        })
        modulated_carrier_freqs.append(idx)

    for i, f in enumerate(freq[freq >= freq[sf_idx]]):
        f_idx = sf_idx + i
        adf_in = lambda frq_list: abs(frq_list - f)
        idx = np.where(resample_freqs == min(resample_freqs, key=adf_in))[0][0]

        f_out = start_freq**(1 - compression_ratio) * f**(compression_ratio)
        adf_out = lambda frq_list: abs(frq_list - f_out)
        frq_idx = np.where(
            resample_freqs == min(resample_freqs, key=adf_out))[0][0]

        modulated_carrier_freqs_dict.update({
            (idx, resample_freqs[idx]): (frq_idx, resample_freqs[frq_idx])
        })
        modulated_carrier_freqs.append(frq_idx)

    data_hp_resampled = stft(data_hp_padded, n_fft=512, center=center)
    modulated_carrier = np.zeros(data_hp_resampled.T.shape, dtype=np.complex)
    data_hp_resampled_T = data_hp_resampled.T

    for time_idx, sample in enumerate(data_hp_resampled_T):
        modulated_carrier[time_idx] = sample[modulated_carrier_freqs]

    modulated_carrier = modulated_carrier.T
    modulated_carrier_td = istft(modulated_carrier, center=center, length=n2)

    # Resample time-domain signal
    data_high_pass_td_new = librosa.core.resample(modulated_carrier_td,
                                                  compression_frequency, fs)

    # Pad the data since istft will drop any data in the last frame if samples are
    # less than n_fft.
    data_pad_new = librosa.util.fix_length(data_high_pass_td_new,
                                           n + n_fft // 2)

    data_high_pass_modulated = stft(data_pad_new, n_fft=n_fft, center=center)

    # Set theory yay!
    # data_double = data_high_pass_modulated - data_low_pass
    data_stacked = data_high_pass_modulated if compress else data_high_pass + data_low_pass
    if db_ref is not None:
        dmag, dphase = librosa.core.magphase(data_stacked)
        dmag = db_to_amplitude(dmag, ref=db_ref)
        data_stacked = dmag * dphase

    plt.figure(get_fig_nums() + 1)
    plt.title('high pass after')
    specshow(amplitude_to_db(data_high_pass_modulated)
             if db_ref is None else data_high_pass_modulated,
             x_axis='time',
             y_axis='linear')
    plt.colorbar()
    # plt.show()

    plt.figure(get_fig_nums() + 1)
    plt.title('stacked')
    specshow(amplitude_to_db(data_stacked) if db_ref is None else data_stacked,
             x_axis='time',
             y_axis='linear')
    plt.colorbar()

    return data_stacked
Ejemplo n.º 20
0
def PlotTemplates(T):
    fftsize = T.shape[1] * 2
    specshow(amplitude_to_db(T.T), sr=SR * fftsize / FFTSIZE, y_axis="linear")
Ejemplo n.º 21
0
def eq(data_db,
       eq_freqs,
       audiogram,
       sr,
       n_fft,
       db_ref,
       data_amp,
       phase,
       plot=False):

    data_raw = deepcopy(data_amp)
    # mag, phase = librosa.core.magphase(data_raw)

    data = deepcopy(data_db)

    if db_ref is None:
        data = amplitude_to_db(data, ref=np.max)

    min_data = np.min(data)

    # Add 80 to magnitude of data
    # breath_indices = data < (min_data + 12.5)
    # data[breath_indices] = min_data
    data += abs(min_data)

    # half everything?
    data_halved = data / 2

    if plot:
        plt.figure(get_fig_nums() + 1)
        plt.title('data halved')
        specshow(
            amplitude_to_db(data_halved) if db_ref is None else data_halved,
            x_axis='time',
            y_axis='linear')
        plt.colorbar()

    eq_freqs = np.array(eq_freqs)
    sample_rates = np.array([sr for _ in eq_freqs])

    fb_sos, _ = librosa.filters._multirate_fb(eq_freqs,
                                              sample_rates,
                                              Q=25.0,
                                              passband_ripple=0.01,
                                              stopband_attenuation=80)
    max_scaling = np.max(audiogram[1])
    scaled_audiogram = audiogram[1] / 120.
    fb = []
    if plot:
        plt.figure(get_fig_nums() + 1)
        plt.title('filterbank')
    for sos in fb_sos:
        freqs, fb_filter = scipy.signal.sosfreqz(np.array(sos),
                                                 n_fft // 2 + 1,
                                                 fs=sr)
        fb.append(fb_filter)
        if plot:
            plt.plot(freqs, np.abs(fb_filter))

    fb = np.array(fb)
    fb_smoothened = np.zeros((fb.shape[1], ), dtype=np.float)
    for filt in fb:
        fb_smoothened += abs(filt)

    data_halved_complete = data_halved
    filtered_data = apply_filter(data_halved_complete, abs(fb_smoothened),
                                 scaled_audiogram)
    # fb_mag, fb_phase = librosa.core.magphase(filtered_data)
    fb_mag = filtered_data
    fb_mag += data_halved

    if plot:
        plt.figure(get_fig_nums() + 1)
        plt.title('filtered')
        specshow(
            amplitude_to_db(abs(fb_mag)) if db_ref is None else abs(fb_mag),
            x_axis='time',
            y_axis='linear')
        plt.colorbar()

    db_shift = 65
    max_filtered_dt = np.max(fb_mag)
    # print(max_filtered_dt - db_shift)
    fb_mag -= (max_filtered_dt - db_shift)

    if plot:
        plt.figure(get_fig_nums() + 1)
        plt.title('final output')
        specshow(
            amplitude_to_db(abs(fb_mag)) if db_ref is None else abs(fb_mag),
            x_axis='time',
            y_axis='linear')
        plt.colorbar()

    # Lowpass filter
    start_freq = 4500
    order = 32
    ftype = 'butter'
    rp = None
    rs = None
    if db_ref is not None:
        ftype = 'cheby2'
        rp = 0.01
        rs = 80

    w_lp, h_lp = design_filter(order=order,
                               cutoff=[start_freq],
                               fs=sr,
                               freqs=n_fft // 2 + 1,
                               ftype=ftype,
                               rp=rp,
                               rs=rs)
    data_low_pass = apply_filter(fb_mag, abs(h_lp))

    dlp_min = np.min(data_low_pass)
    remove_bacground = data_low_pass < 30
    data_low_pass[remove_bacground] = dlp_min

    if plot:
        plt.figure(get_fig_nums() + 1)
        plt.title('low pass')
        specshow(amplitude_to_db(abs(data_low_pass))
                 if db_ref is None else abs(data_low_pass),
                 x_axis='time',
                 y_axis='linear')
        plt.colorbar()

    data_out = db_to_amplitude(data_low_pass, ref=1)
    data_out_noisy = scipy.signal.wiener(data_out,
                                         mysize=[n_fft, 3],
                                         noise=0.01)

    return data_out_noisy
def fourier_transformation(audio: np.ndarray) -> np.ndarray:
    spec = stft(audio, hop_length=2 ** 4, n_fft=2 ** 7)
    spec_db = amplitude_to_db(np.abs(spec))  # convert into decibels
    return spec_db
Ejemplo n.º 23
0
def process_sentence(data, fs, n_fft=512, center=True, plot=False):

    # Default settings for speech analysis
    # n_fft = 512 to provide 25ms-35ms samples
    # (https://towardsdatascience.com/how-to-apply-machine-learning-and-deep-learning-methods-to-audio-analysis-615e286fcbbc)
    n = len(data)

    # Pad the data since istft will drop any data in the last frame if samples are
    # less than n_fft.
    data_pad = librosa.util.fix_length(data, n + n_fft // 2)
    # data_pad = data

    # Get the frequency distribution
    freq = fft_frequencies(sr=fs, n_fft=n_fft)

    # Get the equation and freq, db array from the audiogram provided
    x_audiogram = [125, 250, 500, 1000, 1500, 2000, 2400, 2800, 3000]
    y_audiogram = [10, 15, 0, -10, -30, -35, -40, -50, -60, -70]
    # y_audiogram = [0, 0, 0, 0, 0, 0, 0]
    audiogram = process_audiogram(x_audiogram, y_audiogram, freq, plot)

    # Preemphasis to increase amplitude of high frequencies
    # data_emph_filt = librosa.effects.preemphasis(data_pad)

    # Perform the stft, separate magnitude and save phase for later (important)
    data_pad_stft = stft(data_pad, n_fft=n_fft, center=center)
    mag, phase = librosa.core.magphase(data_pad_stft)

    db_ref = np.max

    # Consider using frequencies of phonomes.
    # eq_freqs = [125, 250, 500, 1000, 1500, 2000, 4000]
    eq_freqs = librosa.filters.mel_frequencies(n_mels=12,
                                               fmin=100.,
                                               fmax=5000.,
                                               htk=True)
    # mel_fb = librosa.filters.mel(
    #   fs,
    #   n_fft,
    #   n_mels=8,
    #   fmin=315.,
    #   fmax=8000.,
    #   norm=None
    # )
    # mel_fb_smoothened = np.zeros((mel_fb.shape[1]))

    # for bands in mel_fb:
    #   mel_fb_smoothened += bands

    # fb_filtered_raw = apply_filter(mag, mel_fb_smoothened)
    # fb_filtered = fb_filtered_raw

    # Normalize to 60db
    fb_filtered = mag
    if db_ref is not None:
        fb_filtered = amplitude_to_db(mag, ref=db_ref)

    # Multiply new magnitude with saved phase to reconstruct sentence
    data_orig_mod = mag * phase  # mag_inv

    data_proc_mag = eq(fb_filtered, eq_freqs, audiogram, fs, n_fft, db_ref,
                       mag, phase, plot)

    data_proc_mag_td = istft(data_proc_mag, center=center, length=n)
    if plot:
        librosa.output.write_wav(os.path.join(
            constants.PP_DATA_DIR, "audio",
            'preprocessed_unfiltered_magnitude.wav'),
                                 data_proc_mag_td,
                                 fs,
                                 norm=True)

    data_proc_magphase_td = istft(data_proc_mag * phase,
                                  center=center,
                                  length=n)
    if plot:
        librosa.output.write_wav(os.path.join(
            constants.PP_DATA_DIR, "audio",
            'preprocessed_unfiltered_magphase.wav'),
                                 data_proc_magphase_td,
                                 fs,
                                 norm=True)

    data_proc_griffinlim_td = librosa.core.griffinlim(data_proc_mag)
    if plot:
        librosa.output.write_wav(os.path.join(
            constants.PP_DATA_DIR, "audio",
            'preprocessed_unfiltered_griffinlim.wav'),
                                 data_proc_griffinlim_td,
                                 fs,
                                 norm=True)

    data_proc = data_proc_mag * phase

    # # compression parameters
    # start_freq = 2400.
    # compression_ratio = 1. / 2.  #:1
    # compression_frequency = 12000
    # compression_nfft = 512

    # data_stacked = nlfc(
    #   data_orig_mod,
    #   freq,
    #   n,
    #   db_ref,
    #   start_freq,
    #   compression_ratio,
    #   compression_frequency,
    #   compression_nfft,
    #   compress=False
    # )

    # Perform the inverse stft
    data_mod = istft(data_proc, center=center, length=n)

    # Denoising

    denoised_signal = data_mod
    # denoised_signal = pra.denoise.apply_subspace(data_mod, frame_len=64, mu=2, lookback=20, skip=1, thresh=0.85, data_type='float64')

    # denoised_signal = pra.denoise.apply_iterative_wiener(data_mod, frame_len=n_fft, lpc_order=12, iterations=2, alpha=0.8, thresh=0.05)

    if plot:
        plt.figure(get_fig_nums() + 1)
        plt.title('final output time domain')
        plt.plot(denoised_signal)

    # Normalize
    denoised_signal = librosa.util.normalize(denoised_signal)
    if plot:
        librosa.output.write_wav(os.path.join(constants.PP_DATA_DIR, "audio",
                                              'preprocessed_filtered.wav'),
                                 denoised_signal,
                                 fs,
                                 norm=True)

    return denoised_signal, audiogram
Ejemplo n.º 24
0
from librosa.core import stft, amplitude_to_db
from librosa.display import specshow

audio_files = glob("datasets/files/set_a/*.wav")

# Read in the first audio file, create the time array
audio, sfreq = lr.load(audio_files[3])

# calculate our STFT
HOP_LENGTH = 2**4
SIZE_WINDOW = 2**7
audio_spec = stft(audio, hop_length=HOP_LENGTH, n_fft=SIZE_WINDOW)

# convert into decibels
spec = amplitude_to_db(audio_spec)

# Visualize
specshow(spec, sr=sfreq, x_axis="time", y_axis="hz", hop_length=HOP_LENGTH)

# calculate spectral features
bandwidth = lr.feature.spectral_bandwidth(S=np.abs(spec))[0]
centroids = lr.feature.spectral_centroid(S=np.abs(spec))[0]

# display these features on top of the spectrogram
ax = specshow(spec, x_axis="time", y_axis="hz", hop_length=HOP_LENGTH)
ax.plot(times, centroids)
ax.fill_between(times,
                centroids - bandwidths / 2,
                centroids + bandwidths / 2,
                alpha=0.5)
Ejemplo n.º 25
0
#     ran = randrange(1000) 

#  Calculation
for c in classes:   
    file = df[df.label==c].iloc[20,0]
    sample_rate, signal = wavfile.read('clean/'+file)   
    
    Y, freq = sp.calc_fft(signal, sample_rate)  # FFT    
    
    
    
    stft_signal = np.abs(stft(signal, 
                              n_fft=config.n_fft, 
                              hop_length=config.hop_length, 
                              window=config.window))
    stft_signal = amplitude_to_db(stft_signal, ref=np.max)  
    mel = melspectrogram(y=signal, 
                         sr=config.sample_rate, 
                         n_mels=config.n_mels, 
                         n_fft=config.n_fft, 
                         hop_length=config.hop_length, 
                         window=config.window)  
    mel[0] = (2*mel.mean() + mel[0])/3  # Reducing Noise
    mel[1] = (mel.mean() + 2*mel[1])/3  # Reducing Noise
    mel_db = amplitude_to_db(mel, ref=np.max)    
    # mel_db = power_to_db(mel)    
    # mel_pow = medfilt2d(mel_pow)
    # mel_pow = wiener(mel_pow)
    
    #  Store in dictionaries
    c = dict_status[c]