Beispiel #1
0
def preprocess(file_path, sr=32000, mono=True, n_fft=1024, hop_length=192, n_mels=128, fmax=None, log_spec=False):
    if mono:
        sig, sr = librosa.load(file_path, sr=sr, mono=True)
        sig = sig[np.newaxis]
    else:
        sig, sr = librosa.load(file_path, sr=sr, mono=False)
        # sig, sf_sr = sf.read(file_path)
        # sig = np.transpose(sig, (1, 0))
        # sig = np.asarray([librosa.resample(s, sf_sr, sr) for s in sig])
    # sig = librosa.effects.pitch_shift(sig, sr, n_steps=pitch_shift)

    for y in sig:

        # compute stft
        stft = librosa.stft(y, n_fft=n_fft, hop_length=hop_length, win_length=None, window='hann', center=True,
                            pad_mode='reflect')

        # keep only amplitures
        stft = np.abs(stft)

        # spectrogram weighting
        if log_spec:
            stft = np.log10(stft + 1)
        else:
            freqs = librosa.core.fft_frequencies(sr=sr, n_fft=n_fft)
            stft = librosa.perceptual_weighting(stft**2, freqs, ref=1.0, amin=1e-10, top_db=99.0)

        # apply mel filterbank
        spectrogram = librosa.feature.melspectrogram(S=stft, sr=sr, n_mels=n_mels, fmax=fmax)

    # print(sig.shape)
    # print(spectrogram.shape)
    return spectrogram
    def __spectrogram_V1(self, signal, fft_window_size, hop_length,
                         log_spectrogram, n_mels, fmax):
        # compute stft
        stft = librosa.stft(signal,
                            n_fft=fft_window_size,
                            hop_length=hop_length,
                            win_length=None,
                            window='hann',
                            center=True,
                            pad_mode='reflect')
        # keep only magnitude
        stft = np.abs(stft)
        # spectrogram weighting
        if log_spectrogram:
            stft = np.log10(stft + 1)
        else:
            freqs = librosa.core.fft_frequencies(sr=self.sample_rate,
                                                 n_fft=fft_window_size)
            stft = librosa.perceptual_weighting(stft**2,
                                                freqs,
                                                ref=1.0,
                                                amin=1e-10,
                                                top_db=99.0)
        # apply mel filterbank
        spectrogram = librosa.feature.melspectrogram(S=stft,
                                                     sr=self.sample_rate,
                                                     n_mels=n_mels,
                                                     fmax=fmax)

        spectrogram = np.asarray(spectrogram)
        return spectrogram
Beispiel #3
0
def processor_d18(file_path):
    n_fft = 2048  # 2048
    sr = 22050  # 22050  # 44100  # 32000
    mono = True  #
    log_spec = False
    n_mels = 256

    hop_length = 512
    fmax = None

    if mono:
        # this is the slowest part resampling
        sig, sr = librosa.load(file_path, sr=sr, mono=True)
        sig = sig[np.newaxis]
    else:
        sig, sr = librosa.load(file_path, sr=sr, mono=False)
        # sig, sf_sr = sf.read(file_path)
        # sig = np.transpose(sig, (1, 0))
        # sig = np.asarray([librosa.resample(s, sf_sr, sr) for s in sig])

    spectrograms = []
    for y in sig:

        # compute stftnp.asfortranarray(x)
        stft = librosa.stft(np.asfortranarray(y),
                            n_fft=n_fft,
                            hop_length=hop_length,
                            win_length=None,
                            window='hann',
                            center=True,
                            pad_mode='reflect')

        # keep only amplitures
        stft = np.abs(stft)

        # spectrogram weighting
        if log_spec:
            stft = np.log10(stft + 1)
        else:
            freqs = librosa.core.fft_frequencies(sr=sr, n_fft=n_fft)
            stft = librosa.perceptual_weighting(stft**2,
                                                freqs,
                                                ref=1.0,
                                                amin=1e-10,
                                                top_db=80.0)

        # apply mel filterbank
        spectrogram = librosa.feature.melspectrogram(S=stft,
                                                     sr=sr,
                                                     n_mels=n_mels,
                                                     fmax=fmax)

        # keep spectrogram
        spectrograms.append(np.asarray(spectrogram))

    spectrograms = np.asarray(spectrograms, dtype=np.float32)

    return spectrograms
Beispiel #4
0
def pre_process(pathname):
    sampling_rate = 32000
    # duration = 4
    # duration = 5
    # confX['hop_length'] = 520  # 20ms
    hop_length = 192
    # fmin = 20
    # fmax = sampling_rate // 2
    fmax = None
    # confX['n_mels'] = 48
    n_mels = 128
    # confX['n_fft'] = confX['n_mels'] * 20
    n_fft = 1024
    # audio_split = 'dont_crop'
    # samples = sampling_rate * duration
    # dims = (n_mels, 1 + int(np.floor(samples / hop_length)), 1)

    # y, sr = librosa.load(pathname, sr = sampling_rate)
    y, sr = librosa.load(pathname, sr=None)
    # y, (trim_begin, trim_end) = librosa.effects.trim(y)

    # Amplitudes of STFT
    stft = np.abs(
        librosa.stft(y,
                     n_fft=n_fft,
                     hop_length=hop_length,
                     window='hann',
                     center=True,
                     pad_mode='reflect'))

    print('stft shape:', stft.shape)

    freqs = librosa.core.fft_frequencies(sr=sampling_rate, n_fft=n_fft)
    stft = librosa.perceptual_weighting(stft * 2,
                                        freqs,
                                        ref=1.0,
                                        amin=1e-10,
                                        top_db=99.0)

    print('stft shape:', stft.shape)

    # Apply mel filterbank
    # Power param is set to 2 (power) by default
    mel_spect = librosa.feature.melspectrogram(S=stft,
                                               sr=sampling_rate,
                                               n_mels=n_mels,
                                               fmax=fmax)

    print('mel shape:', mel_spect.shape)

    log_mel_spect = librosa.core.power_to_db(mel_spect)

    print('log mel shape:', log_mel_spect.shape)

    # spectrogram = librosa.feature.melspectrogram(S = stft)
    # Keep spectrogram
    # return np.asarray(spectrogram)
    return np.asarray(log_mel_spect)
Beispiel #5
0
    def process(self, file_path, **kwargs):
        n_fft = 1024
        sr = 32000
        mono = True
        log_spec = False
        n_mels = 128

        hop_length = 192
        fmax = None

        if mono:
            sig, sr = librosa.load(file_path, sr=sr, mono=True)
            sig = sig[np.newaxis]
        else:
            sig, sr = librosa.load(file_path, sr=sr, mono=False)
            # sig, sf_sr = sf.read(file_path)
            # sig = np.transpose(sig, (1, 0))
            # sig = np.asarray([librosa.resample(s, sf_sr, sr) for s in sig])

        spectrograms = []
        for y in sig:

            # compute stft
            stft = librosa.stft(y,
                                n_fft=n_fft,
                                hop_length=hop_length,
                                win_length=None,
                                window='hann',
                                center=True,
                                pad_mode='reflect')

            # keep only amplitures
            stft = np.abs(stft)

            # spectrogram weighting
            if log_spec:
                stft = np.log10(stft + 1)
            else:
                freqs = librosa.core.fft_frequencies(sr=sr, n_fft=n_fft)
                stft = librosa.perceptual_weighting(stft**2,
                                                    freqs,
                                                    ref=1.0,
                                                    amin=1e-10,
                                                    top_db=99.0)

            # apply mel filterbank
            spectrogram = librosa.feature.melspectrogram(S=stft,
                                                         sr=sr,
                                                         n_mels=n_mels,
                                                         fmax=fmax)

            # keep spectrogram
            spectrograms.append(np.asarray(spectrogram))

        spectrograms = np.asarray(spectrograms)

        return spectrograms
Beispiel #6
0
def mel_weight(S, power):
    global _mel_freqs
    if _mel_freqs is None:
        _mel_freqs = librosa.mel_frequencies(S.shape[0], fmin=hparams.fmin)
    S = librosa.perceptual_weighting(np.abs(S)**power,
                                     _mel_freqs,
                                     ref=hparams.ref_level_db)
    S = _normalize(S - hparams.ref_level_db)
    return S
    def compute_loudness(self, n_fft=256):

        fourier = librosa.stft(self.audio_raw, n_fft=n_fft)
        S = np.abs(fourier * np.conj(fourier))
        log_S = librosa.perceptual_weighting(
            S**2, librosa.fft_frequencies(n_fft=n_fft))
        self.loudness = log_S.sum(axis=0, keepdims=True)[0]
        self.n_points = int(n_fft / 4)
        self.n_windows = int(np.ceil(len(self.audio_raw) / self.n_points))
        print('processing windows: {}'.format(self.n_windows))
        print('points per windows: {}'.format(self.n_points))
	def extractor(self, wav, sr, feature_type, full_file_path):
		"""
		TODO
		- экстрактор принимает файлик
		- `librosa` принимает wav-файлы в `float`, а не в `int` - нужна доп. обработка этого
		Определение экстракторов фичей (и постаугментаторов) файлов.
		Добавление нового экстрактора выглядит как добавление нового `elif`.
		:param wav: объект класса `numpy.ndarray`, считанный wav-файл
		:param sr: sample rate wav-файла
		:param feature_type: объект класса `str`, название экстрактора
		:param full_file_path: объект класса `str`, полный путь до wav-файла
		"""
		# сырые данные
		if feature_type == 'raw':
			features = wav

		# фурье коэффициенты
		elif feature_type == 'fft':
			_, _, spectr = signal.spectrogram(wav, nperseg=200, nfft=200, fs=8000, noverlap=128)
			features = spectr

		elif feature_type == 'mel':
			features = librosa.feature.melspectrogram(wav, n_mels=128, sr=sr, n_fft=2048, hop_length=1024)
		
		elif feature_type == 'mfcc':
			features = librosa.feature.mfcc(wav, n_mfcc=40, sr=sr)
		
		elif feature_type == 'percep_spec':
			n_fft = 512
			stft = librosa.stft(wav, n_fft=n_fft, hop_length=n_fft // 4, win_length=None, window='hann',
								center=True, pad_mode='reflect')
			stft = np.abs(stft)
			freqs = librosa.core.fft_frequencies(sr=sr, n_fft=n_fft)
			stft = librosa.perceptual_weighting(stft ** 2, freqs, ref=1.0, amin=1e-10, top_db=99.0)

			features = librosa.feature.melspectrogram(S=stft, sr=sr, n_mels=128, fmax=sr//2)
		
		# эмбеддинги VGGish модели 
		# подробнее: 
		elif feature_type == 'vggish':
			from models.vggish import vggish_gen_embeddings, VGGISH_CHECKPOINT_PATH, \
				VGGISH_PCA_PARAMS_PATH
			embeddings = vggish_gen_embeddings.get_embeddings(VGGISH_CHECKPOINT_PATH,
																VGGISH_PCA_PARAMS_PATH,
																full_file_path)
			features = embeddings
		
		else:
			pass  # for another types of features
		
		return features
Beispiel #9
0
def spectrogram(y, power, pcen=False):
    global _mel_freqs
    stftS = librosa.stft(y,
                         n_fft=hparams.fft_size,
                         hop_length=hparams.hop_size)
    if hparams.use_preemphasis:
        y = preemphasis(y)
    S = librosa.stft(y, n_fft=hparams.fft_size, hop_length=hparams.hop_size)
    if _mel_freqs is None:
        _mel_freqs = librosa.mel_frequencies(S.shape[0], fmin=hparams.fmin)
    _S = librosa.perceptual_weighting(np.abs(S)**power,
                                      _mel_freqs,
                                      ref=hparams.ref_level_db)
    return _normalize(_S - hparams.ref_level_db), stftS
Beispiel #10
0
def cqtgram(y, sr, hop_length=512, octave_bins=24, n_octaves=8, fmin=40, perceptual_weighting=False):
    s_complex = librosa.cqt(
        y,
        sr=sr,
        hop_length=hop_length,
        bins_per_octave=octave_bins,
        n_bins=octave_bins * n_octaves,
        fmin=fmin,
    )
    specgram = np.abs(s_complex)
    if perceptual_weighting:
        freqs = librosa.cqt_frequencies(specgram.shape[0], fmin=fmin, bins_per_octave=octave_bins)
        specgram = librosa.perceptual_weighting(specgram**2, freqs, ref=np.max)
    else:
        specgram = librosa.amplitude_to_db(specgram, ref=np.max)
    return specgram
Beispiel #11
0
    def predictOne(self, samples: Signal):
        """Calculates the cqt of the given audio using librosa.

        Args:
            samples (Signal): The samples of the audio.
            grid (list of float): The .

        Returns:
            tuple of List[float]: The cqt of the audio.

        """
        sr = samples.sampleRate
        hop_length = self.parameters["hopLength"].value
        n_bins = self.parameters["binNumber"].value
        cqt_sr = sr / hop_length
        cqt = librosa.cqt(samples.values,
                          sr=sr,
                          hop_length=hop_length,
                          n_bins=n_bins)
        linear_cqt = np.abs(cqt)

        if self.parameters["scale"].value == "Amplitude":
            result = linear_cqt
        elif self.parameters["scale"].value == "Power":
            result = linear_cqt**2
        elif self.parameters["scale"].value == "MSAF":
            result = librosa.amplitude_to_db(linear_cqt**2, ref=np.max)
            result += np.min(
                result
            ) * -1  # Inverting the db scale (don't know if this is correct)
        elif self.parameters["scale"].value == "Power dB":
            result = librosa.amplitude_to_db(
                linear_cqt,
                ref=np.max)  # Based on Librosa, standard power spectrum in dB
            result += np.min(result) * -1
        elif self.parameters["scale"].value == "Perceived dB":
            freqs = librosa.cqt_frequencies(linear_cqt.shape[0],
                                            fmin=librosa.note_to_hz('C1'))
            result = librosa.perceptual_weighting(linear_cqt**2,
                                                  freqs,
                                                  ref=np.max)
            result += np.min(result) * -1
        else:
            raise ValueError("parameterScale is not a correct value")

        return (Signal(result.T, sampleRate=cqt_sr), )
def _filename_to_spec(file_path,
                      n_fft=1024,
                      sr=44100,
                      mono=True,
                      log_spec=False,
                      n_mels=64,
                      hop_length=512,
                      fmax=None):
    samples, sr = librosa.load(file_path, sr=sr, mono=mono)

    # Compute stft
    stft = librosa.stft(samples,
                        n_fft=n_fft,
                        hop_length=hop_length,
                        win_length=None,
                        window='hann',
                        center=True,
                        pad_mode='reflect')

    # Get only frequencies and ignore phases.
    stft = np.abs(stft)

    # Select our spectrogram weighting.
    if log_spec:
        stft = np.log10(stft + 1)
    else:
        freqs = librosa.core.fft_frequencies(sr=sr, n_fft=n_fft)
        stft = librosa.perceptual_weighting(stft**2,
                                            freqs,
                                            ref=1.0,
                                            amin=1e-10,
                                            top_db=99.0)

    # Apply mel filterbank.
    spectrogram = librosa.feature.melspectrogram(S=stft,
                                                 sr=sr,
                                                 n_mels=n_mels,
                                                 fmax=fmax).T

    # Visualize
    #plt.pcolormesh(spectrogram.T)
    #plt.show()

    assert spectrogram.shape[1] == n_mels
    return spectrogram
Beispiel #13
0
 def cqtgram(self, y, hop_length=512, octave_bins=24, n_octaves=8, fmin=40,
             perceptual_weighting=False):
     S_complex = librosa.cqt(
         y,
         sr=self.sr,
         hop_length=hop_length,
         bins_per_octave=octave_bins,
         n_bins=octave_bins*n_octaves,
         fmin=fmin,
         real=False
     )
     S = np.abs(S_complex)
     if perceptual_weighting:
         freqs = librosa.cqt_frequencies(
             S.shape[0],
             fmin=fmin,
             bins_per_octave=octave_bins
         )
         S = librosa.perceptual_weighting(S**2, freqs, ref_power=np.max)
     else:
         S = librosa.logamplitude(S**2, ref_power=np.max)
     return S
Beispiel #14
0
 def cqtgram(self,
             y,
             hop_length=512,
             octave_bins=24,
             n_octaves=8,
             fmin=40,
             perceptual_weighting=False):
     S_complex = librosa.cqt(y,
                             sr=self.sr,
                             hop_length=hop_length,
                             bins_per_octave=octave_bins,
                             n_bins=octave_bins * n_octaves,
                             fmin=fmin,
                             real=False)
     S = np.abs(S_complex)
     if perceptual_weighting:
         freqs = librosa.cqt_frequencies(S.shape[0],
                                         fmin=fmin,
                                         bins_per_octave=octave_bins)
         S = librosa.perceptual_weighting(S**2, freqs, ref_power=np.max)
     else:
         S = librosa.logamplitude(S**2, ref_power=np.max)
     return S
Beispiel #15
0
#np.angle(D[f, t]) is the phase of frequency bin f at frame t
#Rmq: ft = magnitude * phase
Magnitude_l = np.abs(ft_left)
Magnitude_r = np.abs(ft_left)
#Phase = np.angle(ft_left)

Power_l = Magnitude_l**2
Power_r = Magnitude_r**2
print(Power_l.shape)

#Remove of the boucle ?
fft_frequencies = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
print(fft_frequencies.shape)

#Perceptual weighting of a power spectrogram
pw_l = librosa.perceptual_weighting(S=Magnitude_l**2,
                                    frequencies=fft_frequencies)
pw_r = librosa.perceptual_weighting(S=Magnitude_r**2,
                                    frequencies=fft_frequencies)
#more option as power_to_db: ref=1.0, amin=1e-10, top_db=80.0
print(pw_l.shape)

ms_l = librosa.feature.melspectrogram(S=pw_l, n_mels=256)
ms_r = librosa.feature.melspectrogram(S=pw_r, n_mels=256)
#by default n_mels=128
print(ms_l.shape)

tranform = np.empty((2, 256, 431))
tranform[0] = ms_l
tranform[1] = ms_r

path_save = data_path + "\\" + save1_path + "\\"
def perceptual_cqt(y,sr):
    C = np.abs(librosa.cqt(y, sr=sr, fmin=librosa.note_to_hz('A1')))
    freqs = librosa.cqt_frequencies(C.shape[0], fmin=librosa.note_to_hz('A1'))#Adapted to music
    perceptual_CQT = librosa.perceptual_weighting(C**2, freqs, ref=np.max)# Uses
    return perceptual_CQT
Beispiel #17
0
def weighted_spectro(audio, sr):
    C = np.abs(librosa.cqt(audio, sr=sr, fmin=cqt_fmin))
    freqs = librosa.cqt_frequencies(C.shape[0], fmin=cqt_fmin)
    perceptual_sdb = librosa.perceptual_weighting(C**2, freqs, ref=np.max)
    return perceptual_sdb, librosa.db_to_power(perceptual_sdb)
Beispiel #18
0
print(x.shape, sr)  #x.shape = (276480,) sr = 22050

plt.figure(figsize=(14, 5))
librosa.display.waveplot(x, sr=sr)
plt.show()

X = librosa.stft(x)
Xdb = librosa.amplitude_to_db(abs(X))
plt.figure(figsize=(14, 5))
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='log')
plt.colorbar()
plt.show()

#Perceptual Weighting:
freq = librosa.core.fft_frequencies(sr=sr)
mag = librosa.perceptual_weighting(abs(X)**2, freq)
librosa.display.specshow(mag, sr=sr, x_axis='time', y_axis='log')
plt.colorbar()
plt.show()

r = librosa.autocorrelate(x, max_size=6000)
sample = r[:300]
plt.figure(figsize=(14, 5))
plt.plot(sample)
plt.show()

#Chroma Features
sound_len = 400
chrom = librosa.feature.chroma_stft(x, sr=sr, hop_length=sound_len)
plt.figure(figsize=(14, 5))
librosa.display.specshow(chrom,