Example #1
0
    def __test(y, top_db, ref, trim_duration):
        yt, idx = librosa.effects.trim(y, top_db=top_db,
                                       ref=ref)

        # Test for index position
        fidx = [slice(None)] * y.ndim
        fidx[-1] = slice(*idx.tolist())
        assert np.allclose(yt, y[tuple(fidx)])

        # Verify logamp
        rms = librosa.feature.rmse(y=librosa.to_mono(yt), center=False)
        logamp = librosa.power_to_db(rms**2, ref=ref, top_db=None)
        assert np.all(logamp > - top_db)

        # Verify logamp
        rms_all = librosa.feature.rmse(y=librosa.to_mono(y)).squeeze()
        logamp_all = librosa.power_to_db(rms_all**2, ref=ref,
                                         top_db=None)

        start = int(librosa.samples_to_frames(idx[0]))
        stop = int(librosa.samples_to_frames(idx[1]))
        assert np.all(logamp_all[:start] <= - top_db)
        assert np.all(logamp_all[stop:] <= - top_db)

        # Verify duration
        duration = librosa.get_duration(yt)
        assert np.allclose(duration, trim_duration, atol=1e-1), duration
Example #2
0
def test_mfcc():

    def __test(dct_type, norm, n_mfcc, S):

        E_total = np.sum(S, axis=0)

        mfcc = librosa.feature.mfcc(S=S, dct_type=dct_type, norm=norm, n_mfcc=n_mfcc)

        assert mfcc.shape[0] == n_mfcc
        assert mfcc.shape[1] == S.shape[1]

        # In type-2 mode, DC component should be constant over all frames
        if dct_type == 2:
            assert np.var(mfcc[0] / E_total) <= 1e-30

    S = librosa.power_to_db(np.random.randn(128, 100)**2, ref=np.max)

    for n_mfcc in [13, 20]:
        for dct_type in [1, 2, 3]:
            for norm in [None, 'ortho']:
                if dct_type == 1 and norm == 'ortho':
                    tf = pytest.mark.xfail(__test, raises=NotImplementedError)
                else:
                    tf = __test
                yield tf, dct_type, norm, n_mfcc, S
Example #3
0
def test_previews(meta):
    np.random.seed(20171207)

    recordings = meta.groupby('target')['filename'].apply(lambda cat: cat.sample(1)).reset_index()['filename']

    f, ax = plt.subplots(1, 1, sharey=False, sharex=False, figsize=(8, 2))

    with tempfile.TemporaryDirectory() as tmpdir:
        for index in range(len(recordings)):
            recording = recordings[index]
            signal = librosa.load('audio/' + recording, sr=44100)[0]
            spec = librosa.feature.melspectrogram(signal, sr=44100, n_fft=2205, hop_length=441)
            spec = librosa.power_to_db(spec)

            category = meta[meta.filename == recording].category.values[0]

            ax.imshow(spec, origin='lower', interpolation=None, cmap='viridis', aspect=1.1)
            ax.set_title(f'{category} - {recording}', fontsize=11)
            ax.get_yaxis().set_visible(False)
            ax.get_xaxis().set_visible(False)
            f.tight_layout()
            plt.savefig(f'{tmpdir}/{index:02d}.png', bbox_inches='tight', dpi=72)

        subprocess.call(['convert', '-delay', '100', '-loop', '0', f'{tmpdir}/*.png', '_esc50.gif'])

    assert filecmp.cmp('esc50.gif', '_esc50.gif')
Example #4
0
def static_spectrogram(
        data,
        filename,
        block_nb=0,
        mel_bands=128,
        fmax=22050,
        x_axis='time',
        y_axis='mel',
        display=False):
    """ Compute the static spectrogram of a time serie of samples.

    The static spectromgram is computed by take the power of the signal in the
    frequency domain according a decomposition in mel bands and a maximum
    frequency.

    Args:
        data (array): 1D array of audio data.
        mel_bands (int): number of mel bands for the decomposition
        fmax (int): maximum frequency (in Hertz).
        display (boolean): plotting or saving the output figure.

    Returns:
        None

    Todo:
        - remove the padding/margin around the plot
        - Add a path and a name where to save the plots

    Note:
        Need to ensure that the computation is accurate

    """
    data_freq_power = np.abs(librosa.stft(data))**2
    librosa.feature.melspectrogram(
            S=data_freq_power,
            power=2.0,
            n_mels=mel_bands,
            fmax=fmax)

    librosa.display.specshow(
            librosa.power_to_db(data_freq_power, ref=np.max),
            y_axis=y_axis,
            x_axis=x_axis,
            fmax=fmax)

    if display:
        plt.ylabel('Mel')
        plt.xlabel('Time [samples]')
        plt.show()
    else:
        spec_path = utils.read_config('path', 'spectrograms')
        fname = os.path.splitext(os.path.basename(filename))
        fig_path = utils.create_filename(
                spec_path,
                'png',
                fname[0],
                'static',
                block_nb)
        plt.savefig(fig_path)
 def __call__(self, data):
     stft = data['stft']
     sample_rate = data['sample_rate']
     n_fft = data['n_fft']
     mel_basis = librosa.filters.mel(sample_rate, n_fft, self.n_mels)
     s = np.dot(mel_basis, np.abs(stft)**2.0)
     data['mel_spectrogram'] = librosa.power_to_db(s, ref=np.max)
     return data
Example #6
0
    def __test(x, ref, amin, top_db):

        y = librosa.power_to_db(x,
                                ref=ref,
                                amin=amin,
                                top_db=top_db)

        assert np.isrealobj(y)
        eq_(y.shape, x.shape)

        if top_db is not None:
            assert y.min() >= y.max()-top_db
Example #7
0
def test_power_to_db_logamp():

    srand()

    NOISE_FLOOR = 1e-6

    # Make some noise
    x = np.abs(np.random.randn(1000)) + NOISE_FLOOR

    db1 = librosa.power_to_db(x**2, top_db=None)
    db2 = librosa.logamplitude(x**2, top_db=None)

    assert np.allclose(db1, db2)
Example #8
0
File: utils.py Project: hvy/chainer
    def __call__(self, path):
        # load data with trimming and normalizing
        raw, _ = librosa.load(path, self.sr, res_type='kaiser_fast')
        raw, _ = librosa.effects.trim(raw, self.top_db)
        raw /= numpy.abs(raw).max()
        raw = raw.astype(numpy.float32)

        # mu-law transform
        quantized = self.mu_law.transform(raw)

        # padding/triming
        if self.length is not None:
            if len(raw) <= self.length:
                # padding
                pad = self.length - len(raw)
                raw = numpy.concatenate(
                    (raw, numpy.zeros(pad, dtype=numpy.float32)))
                quantized = numpy.concatenate(
                    (quantized, self.quantize // 2 * numpy.ones(pad)))
                quantized = quantized.astype(numpy.int32)
            else:
                # triming
                start = random.randint(0, len(raw) - self.length - 1)
                raw = raw[start:start + self.length]
                quantized = quantized[start:start + self.length]

        # calculate mel-spectrogram
        spectrogram = librosa.feature.melspectrogram(
            raw, self.sr, n_fft=self.n_fft, hop_length=self.hop_length,
            n_mels=self.n_mels)
        spectrogram = librosa.power_to_db(
            spectrogram, ref=numpy.max)

        # normalize mel spectrogram into [-1, 1]
        spectrogram += 40
        spectrogram /= 40
        if self.length is not None:
            spectrogram = spectrogram[:, :self.length // self.hop_length]
        spectrogram = spectrogram.astype(numpy.float32)

        # expand dimensions
        one_hot = numpy.identity(
            self.quantize, dtype=numpy.float32)[quantized]
        one_hot = numpy.expand_dims(one_hot.T, 2)
        spectrogram = numpy.expand_dims(spectrogram, 2)
        quantized = numpy.expand_dims(quantized, 1)

        return one_hot[:, :-1], spectrogram, quantized[1:]
Example #9
0
def wav_to_spec(wav_audio, hparams):
  """Transforms the contents of a wav file into a series of spectrograms."""
  if hparams.spec_type == 'raw':
    spec = _wav_to_framed_samples(wav_audio, hparams)
  else:
    if hparams.spec_type == 'cqt':
      spec = _wav_to_cqt(wav_audio, hparams)
    elif hparams.spec_type == 'mel':
      spec = _wav_to_mel(wav_audio, hparams)
    else:
      raise ValueError('Invalid spec_type: {}'.format(hparams.spec_type))

    if hparams.spec_log_amplitude:
      spec = librosa.power_to_db(spec)

  return spec
def melspectrogram(filename):
    import librosa

    y, sr = librosa.load(filename)
    librosa.feature.melspectrogram(y=y, sr=sr)

    D = np.abs(librosa.stft(y)) ** 2
    S = librosa.feature.melspectrogram(S=D)

    # Passing through arguments to the Mel filters
    #S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)

    import matplotlib.pyplot as plt
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max),
                             y_axis='mel', fmax=8000,
                             x_axis='time')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel spectrogram')
    plt.tight_layout()
    plt.show()
 def __call__(self, data):
     samples = data['samples']
     sample_rate = data['sample_rate']
     s = librosa.feature.melspectrogram(samples, sr=sample_rate, n_mels=self.n_mels)
     data['mel_spectrogram'] = librosa.power_to_db(s, ref=np.max)
     return data

fig.add_subplot(4,2,7)
'''
Pxx, freqs, bins, im = plt.specgram(audData, Fs=rate, NFFT=1024, cmap=plt.get_cmap('autumn_r'))
cbar=plt.colorbar(im)
plt.xlabel('Time (s)')
plt.ylabel('Frequency (Hz)')
cbar.set_label('Intensity dB')
'''
y, sr = librosa.load(temp_folder)
librosa.feature.melspectrogram(y=y, sr=sr)
D = np.abs(librosa.stft(y))**2
S = librosa.feature.melspectrogram(S=D, sr=sr)
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=sr/2)
S_dB = librosa.power_to_db(S, ref=np.max)
librosa.display.specshow(S_dB, x_axis='time',y_axis='mel', sr=sr, fmax=sr/2)
#plt.yticks(np.arange(0, 10000, step=1000))
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram')



plt.show()
plt.close()


fig = plt.figure(figsize=(20, 20))
plt.subplots_adjust(hspace = 0.2, wspace = 0.2)

Example #13
0
def specgram(audio,
             n_fft=512,
             hop_length=None,
             mask=True,
             log_mag=True,
             re_im=False,
             dphase=True,
             mag_only=False):
  """Spectrogram using librosa.

  Args:
    audio: 1-D array of float32 sound samples.
    n_fft: Size of the FFT.
    hop_length: Stride of FFT. Defaults to n_fft/2.
    mask: Mask the phase derivative by the magnitude.
    log_mag: Use the logamplitude.
    re_im: Output Real and Imag. instead of logMag and dPhase.
    dphase: Use derivative of phase instead of phase.
    mag_only: Don't return phase.

  Returns:
    specgram: [n_fft/2 + 1, audio.size / hop_length, 2]. The first channel is
      the logamplitude and the second channel is the derivative of phase.
  """
  if not hop_length:
    hop_length = int(n_fft / 2.)

  fft_config = dict(
      n_fft=n_fft, win_length=n_fft, hop_length=hop_length, center=True)

  spec = librosa.stft(audio, **fft_config)

  if re_im:
    re = spec.real[:, :, np.newaxis]
    im = spec.imag[:, :, np.newaxis]
    spec_real = np.concatenate((re, im), axis=2)

  else:
    mag, phase = librosa.core.magphase(spec)
    phase_angle = np.angle(phase)

    # Magnitudes, scaled 0-1
    if log_mag:
      mag = (librosa.power_to_db(
          mag**2, amin=1e-13, top_db=120., ref=np.max) / 120.) + 1
    else:
      mag /= mag.max()

    if dphase:
      #  Derivative of phase
      phase_unwrapped = np.unwrap(phase_angle)
      p = phase_unwrapped[:, 1:] - phase_unwrapped[:, :-1]
      p = np.concatenate([phase_unwrapped[:, 0:1], p], axis=1) / np.pi
    else:
      # Normal phase
      p = phase_angle / np.pi
    # Mask the phase
    if log_mag and mask:
      p = mag * p
    # Return Mag and Phase
    p = p.astype(np.float32)[:, :, np.newaxis]
    mag = mag.astype(np.float32)[:, :, np.newaxis]
    if mag_only:
      spec_real = mag[:, :, np.newaxis]
    else:
      spec_real = np.concatenate((mag, p), axis=2)
  return spec_real
Example #14
0
test_preds = classifier.predict(test_pca)
test_acc = np.sum(test_preds == y_te)
test_acc = test_acc / len(y_te)
scale_file = "debussy2ms.wav"
scale, sr = lb.load(scale_file)

#S_scale = librosa.stft(scale, n_fft=FRAME_SIZE, hop_length=HOP_SIZE)

mel_spectrogram_TestSong = lb.feature.melspectrogram(scale,
                                                     sr=SR,
                                                     n_fft=N_FFT,
                                                     hop_length=HOP_LENGTH,
                                                     n_mels=N_MELS)
#trained = scaler.transform(mel_spectrogram_TestSong)

melspectrogram_TestSong = lb.power_to_db(mel_spectrogram_TestSong**2)
melspectrogram_TestSong = melspectrogram_TestSong.reshape(
    1, melspectrogram_TestSong.shape[0] * melspectrogram_TestSong.shape[1])

print("Shape that i pass to predicter", melspectrogram_TestSong.shape)
scaler1 = StandardScaler()
scaler1.fit(melspectrogram_TestSong)
melspectrogram_TestSong = scaler1.transform(melspectrogram_TestSong)

#pca2 = PCA(n_components = 1)
#pca2.fit(melspectrogram_TestSong)
#mel_spectrogram_TestSong = pca2.transform(melspectrogram_TestSong)

print("after pca", melspectrogram_TestSong.shape)

melspectrogram_TestSong.shape
def detectionOnsets(y):
    fmin = librosa.note_to_hz(Notemin)
    fmax = librosa.note_to_hz(Notemax)
    #Nmin = int((sr/(fmax*(2**(1/BINS_PER_OCTAVE)-1))))
    #Nmax = int((sr/(fmin*(2**(1/BINS_PER_OCTAVE)-1))))
    n_bins = int(
        (librosa.note_to_midi(Notemax) - librosa.note_to_midi(Notemin)) *
        BINS_PER_OCTAVE / 12)
    Chrom = librosa.amplitude_to_db(np.abs(
        librosa.cqt(y=y,
                    sr=sr,
                    hop_length=STEP,
                    fmin=fmin,
                    bins_per_octave=BINS_PER_OCTAVE,
                    n_bins=n_bins)),
                                    ref=np.max)
    Nf = len(Chrom)
    N = len(Chrom[0])
    Diff = np.zeros((Nf, N))
    Dev = np.zeros(N)
    for j in range(1, N):
        for i in range(Nf):
            Diff[i, j] = np.abs(Chrom[i, j] - Chrom[i, j - 1])
            Dev[j] = sum(Diff[:, j])

    # FONCTION DE SEUIL
    # Ajout de zéros en queue et en tête
    l = []
    Seuil = []
    Onsets = []
    for k in range(int(H / 2)):
        l.append(0)
    for val in Dev:
        l.append(val)
    for k in range(int(H / 2)):
        l.append(0)
    #Calcul de la médiane
    for i in range(N):
        Seuil.append(ALPHA + BETA * stat.median(l[i:i + H]))
        if Dev[i] > Seuil[i]:
            Onsets.append(i)

    times = librosa.frames_to_time(np.arange(N), sr=sr, hop_length=STEP)

    # FONCTION DE TRI SUR LES  ONSETS
    i = 0
    while i < (len(Onsets) - 1):
        while (i < (len(Onsets) - 1)) and (times[Onsets[i + 1]] <
                                           times[Onsets[i]] + T):
            if Dev[Onsets[i + 1]] < Dev[Onsets[i]]: del Onsets[i + 1]
            else: del Onsets[i]
        i = i + 1

    onset_frames = librosa.util.fix_frames(Onsets,
                                           x_min=0,
                                           x_max=Chrom.shape[1] - 1)
    onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=STEP)

    #Synchronisation sur les onsets, en enlevant le début et la fin des longues frames
    ChromSync = np.zeros((Nf, len(onset_frames) - 1))
    n_att = int(librosa.time_to_frames(T_att, sr=sr, hop_length=STEP))
    for j in range(len(onset_frames) - 1):
        for i in range(Nf):
            ChromSync[i, j] = np.mean(Chrom[i][(onset_frames[j] +
                                                n_att):(onset_frames[j + 1] -
                                                        n_att)])

    #Normalisation du spectre


#    ChromSync[:,1] = librosa.power_to_db(librosa.db_to_power(ChromSync[:,1]) / np.sum(librosa.db_to_power(ChromSync[:,1])))
    if norm_spectre:
        for j in range(ChromSync.shape[1]):
            ChromSync[:, j] = librosa.power_to_db(
                librosa.db_to_power(ChromSync[:, j]) /
                np.sum(librosa.db_to_power(ChromSync[:, j])))

    #Affichage
    if plot_onsets:
        plt.figure(figsize=(13, 7))
        ax1 = plt.subplot(3, 1, 1)
        librosa.display.specshow(Chrom,
                                 bins_per_octave=BINS_PER_OCTAVE,
                                 fmin=fmin,
                                 y_axis='cqt_note',
                                 x_axis='time',
                                 x_coords=times)
        plt.title('CQT spectrogram')

        plt.subplot(3, 1, 2, sharex=ax1)
        plt.plot(times, Dev, label='Deviation')
        plt.plot(times, Seuil, color='g', label='Seuil')
        plt.vlines(times[Onsets],
                   0,
                   Dev.max(),
                   color='r',
                   alpha=0.9,
                   linestyle='--',
                   label='Onsets')
        plt.axis('tight')
        plt.legend(frameon=True, framealpha=0.75)

        ax1 = plt.subplot(3, 1, 3, sharex=ax1)
        librosa.display.specshow(ChromSync,
                                 bins_per_octave=BINS_PER_OCTAVE,
                                 fmin=fmin,
                                 y_axis='cqt_note',
                                 x_axis='time',
                                 x_coords=onset_times)
        plt.show()

    return onset_times
Example #16
0
def get_log_spectrum(x):
    s = librosa.core.stft(x, n_fft=2048, win_length=2048, hop_length=512)
    a = np.abs(s)**2
    #melspect = librosa.feature.melspectrogram(S=a)
    feat = librosa.power_to_db(a)
    return feat
Example #17
0
def test_melspectrogram_correctness(
    n_fft, sr, hop_length, n_ch, data_format, amin, dynamic_range, n_mels, mel_f_min, mel_f_max
):
    """Test the correctness of melspectrogram.

    Note that mel filterbank is tested separated

    """

    def _get_melgram_model(return_decibel, amin, dynamic_range, input_shape=None):
        # compute with kapre
        melgram_model = get_melspectrogram_layer(
            n_fft=n_fft,
            sample_rate=sr,
            n_mels=n_mels,
            mel_f_min=mel_f_min,
            mel_f_max=mel_f_max,
            win_length=win_length,
            hop_length=hop_length,
            input_data_format=data_format,
            output_data_format=data_format,
            return_decibel=return_decibel,
            input_shape=input_shape,
            db_amin=amin,
            db_dynamic_range=dynamic_range,
        )
        return melgram_model

    src_mono, batch_src, input_shape = get_audio(data_format=data_format, n_ch=n_ch)

    win_length = n_fft  # test with x2
    # compute with librosa
    S_ref = librosa.feature.melspectrogram(
        src_mono,
        sr=sr,
        n_fft=n_fft,
        hop_length=hop_length,
        win_length=win_length,
        center=False,
        power=1.0,
        n_mels=n_mels,
        fmin=mel_f_min,
        fmax=mel_f_max,
    ).T

    S_ref = np.expand_dims(S_ref, axis=2)  # time, freq, ch=1
    S_ref = np.tile(S_ref, [1, 1, n_ch])  # time, freq, ch=n_ch

    if data_format == 'channels_first':
        S_ref = np.transpose(S_ref, (2, 0, 1))  # ch, time, freq

    # melgram
    melgram_model = _get_melgram_model(
        return_decibel=False, input_shape=input_shape, amin=None, dynamic_range=120.0
    )
    S = melgram_model.predict(batch_src)[0]  # 3d representation
    np.testing.assert_allclose(S_ref, S, atol=1e-4)

    # log melgram
    melgram_model = _get_melgram_model(
        return_decibel=True, input_shape=input_shape, amin=amin, dynamic_range=dynamic_range
    )
    S = melgram_model.predict(batch_src)[0]  # 3d representation
    S_ref_db = librosa.power_to_db(S_ref, ref=1.0, amin=amin, top_db=dynamic_range)

    np.testing.assert_allclose(
        S_ref_db, S, rtol=3e-3
    )  # decibel is evaluated with relative tolerance
    sr=sr,
    n_mels=128,
)
fig, ax = plt.subplots(figsize=(30, 10))
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
img = librosa.display.specshow(D,
                               y_axis='linear',
                               x_axis='time',
                               sr=sr,
                               ax=ax,
                               cmap='viridis')
plt.figure(dpi=1200)
plt.show()

fig, ax = plt.subplots()
S_dB = librosa.power_to_db(S, ref=np.max)
img = librosa.display.specshow(S_dB,
                               x_axis='time',
                               y_axis='mel',
                               sr=sr,
                               ax=ax,
                               cmap='viridis')
fig.colorbar(img, ax=ax, format='%+2.0f dB')
ax.set_title('Mel-Frequency Spectrogram', size=20)
ax.set_ylabel('log Hz', size=15)
ax.set_xlabel('Time', size=15)

plt.show()

plt.show()
Example #19
0
def getAudioSamples(fn,
                    min_dur=50,
                    max_dur=-1,
                    fft=2048,
                    hop_length=512,
                    backtrack=True,
                    superFlux=True,
                    y=None,
                    sr=None,
                    delta=0.07):
    basename = os.path.basename(fn)
    fn = getAudioFile(fn)
    duration = 0

    # load audio
    if y is None or sr is None:
        try:
            y, sr = loadAudioData(fn)
            duration = int(getDurationFromAudioData(y, sr) * 1000)
        except audioop.error:
            duration = 0
            y = None
            sr = None

    # maxVal = y.max()
    # if maxVal != 0:
    #     y /= maxVal

    if duration <= 0:
        return ([], y, sr)

    # retrieve onsets using superflux method
    # https://librosa.github.io/librosa/auto_examples/plot_superflux.html#sphx-glr-auto-examples-plot-superflux-py
    # http://dafx13.nuim.ie/papers/09.dafx2013_submission_12.pdf
    if superFlux:
        lag = 2
        n_mels = 138
        fmin = 27.5
        fmax = 16000.0
        max_size = 3
        S = librosa.feature.melspectrogram(y,
                                           sr=sr,
                                           n_fft=fft,
                                           hop_length=hop_length,
                                           fmin=fmin,
                                           fmax=fmax,
                                           n_mels=n_mels)
        odf = librosa.onset.onset_strength(S=librosa.power_to_db(S,
                                                                 ref=np.max),
                                           sr=sr,
                                           hop_length=hop_length,
                                           lag=lag,
                                           max_size=max_size)
        onsets = librosa.onset.onset_detect(onset_envelope=odf,
                                            sr=sr,
                                            hop_length=hop_length,
                                            backtrack=backtrack,
                                            delta=delta)

    # retrieve onsets using default method
    else:
        onsets = librosa.onset.onset_detect(y=y,
                                            sr=sr,
                                            hop_length=hop_length,
                                            backtrack=backtrack,
                                            delta=delta)

    times = [
        int(round(1.0 * hop_length * onset / sr * 1000)) for onset in onsets
    ]
    # add the end of the audio
    times.append(duration - 1)

    samples = []
    for i, t in enumerate(times):
        if i > 0:
            prev = times[i - 1]
            dur = t - prev
            if max_dur > 0 and dur > max_dur:
                dur = max_dur
            if dur >= min_dur:
                samples.append({
                    "filename": basename,
                    "start": prev,
                    "dur": dur
                })

    return (samples, y, sr)
                temp_signal = signal[:3 * Config.sr]  # take 3s chunks
                mask[:int(3 * Config.sr -
                          1)] = False  # go forward mask out the first 3s chunk
                signal = signal[mask]

                #compute mel-spectrogram
                mel_spec = librosa.feature.melspectrogram(
                    temp_signal,
                    sr=Config.sr,
                    n_fft=Config.n_fft,
                    hop_length=Config.hop_length,
                    n_mels=Config.n_mels,
                    fmin=Config.fmin,
                    fmax=Config.fmax)
                # compute log mel spectrogram
                log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)

                # normalization
                norm_log_mel_spec = librosa.util.normalize(log_mel_spec)

                #plot and save plot
                plt.figure(figsize=Config.figsize, dpi=Config.dpi)

                librosa.display.specshow(norm_log_mel_spec,
                                         fmin=Config.fmin,
                                         fmax=Config.fmax,
                                         sr=Config.sr,
                                         hop_length=Config.hop_length,
                                         cmap=Config.color)

                fig = plt.gcf()
Example #21
0
def feature_extraction(y,
                       fs=44100,
                       statistics=True,
                       include_mfcc0=True,
                       include_delta=True,
                       include_acceleration=True,
                       mfcc_params=None,
                       delta_params=None,
                       acceleration_params=None):
    eps = numpy.spacing(1)
    # 窓関数
    window = scipy.signal.hamming(mfcc_params['n_fft'], sym=False)
    # 静的係数を計算する
    # librosa.stft -> 短時間フーリエ変換
    # librosa.mel  -> メルフィルタバンクを作成する
    power_spectrogram = numpy.abs(
        librosa.stft(
            y + eps,
            n_fft=mfcc_params['n_fft'],
            #win_length=mfcc_params['win_length'],
            hop_length=mfcc_params['hop_length'],
            center=True,
            window=window))**2
    mel_basis = librosa.filters.mel(sr=fs,
                                    n_fft=mfcc_params['n_fft'],
                                    n_mels=mfcc_params['n_mels'],
                                    fmin=mfcc_params['fmin'],
                                    fmax=mfcc_params['fmax'],
                                    htk=mfcc_params['htk'])
    mel_spectrum = numpy.dot(mel_basis, power_spectrogram)
    mfcc = librosa.feature.mfcc(S=librosa.power_to_db(mel_spectrum),
                                n_mfcc=mfcc_params['n_mfcc'])
    # Collect the feature matrix 特徴量行列
    feature_matrix = mfcc
    if include_delta:
        # デルタ係数(1階微分)
        mfcc_delta = librosa.feature.delta(mfcc, **delta_params)
        # 特徴量行列にデルタ係数を加える
        feature_matrix = numpy.vstack((feature_matrix, mfcc_delta))
    if include_acceleration:
        # 加速度係数(二階微分)
        mfcc_delta2 = librosa.feature.delta(mfcc,
                                            order=2,
                                            **acceleration_params)
        # 特徴量行列にデルタ係数を加える
        feature_matrix = numpy.vstack((feature_matrix, mfcc_delta2))
    if not include_mfcc0:
        # Omit mfcc0
        feature_matrix = feature_matrix[1:, :]
    feature_matrix = feature_matrix.T
    # Collect into data structure
    if statistics:
        return {
            'feat': feature_matrix,
            'stat': {
                'mean': numpy.mean(feature_matrix, axis=0),
                'std': numpy.std(feature_matrix, axis=0),
                'N': feature_matrix.shape[0],
                'S1': numpy.sum(feature_matrix, axis=0),
                'S2': numpy.sum(feature_matrix**2, axis=0),
            }
        }
    else:
        return {'feat': feature_matrix}
Example #22
0
                  desc=label):
 f = row['path']
 try:
     data, sr = lr.load(os.path.join(root, label, 'clips', f),
                        sr=SR,
                        mono=True,
                        dtype=np.float32,
                        res_type='kaiser_fast')
     data, _ = lr.effects.trim(
         data)  # trim leading and trailing silence
     mel_specgram = lr.feature.melspectrogram(
         data,
         n_mels=64,
         hop_length=hop_in_samples,
         n_fft=n_fft)
     mfcc = lr.feature.mfcc(S=lr.power_to_db(mel_specgram),
                            sr=SR,
                            n_mfcc=n_channels,
                            n_dim=1)
     # plt.imshow(mfcc.T, cmap='viridis', aspect='auto')
     # plt.savefig('MFCC_test_{}.png'.format(label))
     if METHOD is 'h5':
         writer.create_dataset(str(idx), data=mfcc.reshape(-1))
     elif METHOD is 'tfrecord':
         mfcc_feature = tf.train.Feature(
             float_list=tf.train.FloatList(
                 value=mfcc.reshape(-1).tolist()))
         tf_label = tf.train.Feature(
             bytes_list=tf.train.BytesList(
                 value=[label.encode('utf-8')]))
         age = '' if type(row['age']) == float else row[
Example #23
0
 def power_to_db(self, S, ref=1.0, amin=1e-10, top_db=80.0):
     return librosa.power_to_db(S, ref=ref, amin=amin, top_db=top_db)
Example #24
0
        spec_augment_tensorflow.visualization_spectrogram(
            warped_masked_spectrogram, 'after')

        shape = warped_masked_spectrogram.shape[
            0] * warped_masked_spectrogram.shape[1]
        if shape > max_shape:
            max_shape = shape
        x_train[count] = [0] * shape
        shape = 0
        for i in range(warped_masked_spectrogram.shape[0]):
            for j in range(warped_masked_spectrogram.shape[1]):
                x_train[count][shape] = warped_masked_spectrogram[i][j]
                shape - shape + 1
        count = count + 1

        librosa.display.specshow(librosa.power_to_db(melspec, ref=np.max))
        pylab.savefig(save_path, bbox_inches=None, pad_inches=0)
        pylab.close()

x_train = x_train.reshape(1, num_of_files, max_shape)

labels = np.zeros((1, num_of_files, 1))
for i in range(num_of_files):
    labels[0][i][0] = random.randint(0, 9)
    continue

from AudioDataGenerator import AudioDataGenerator

datagen = AudioDataGenerator(featurewise_center=True,
                             featurewise_std_normalization=True,
                             shift=.2,
Example #25
0
    def _normalize(audio):
        audio = librosa.power_to_db(audio, ref=np.max)
        audio = (audio + 80) / 80

        return audio
Example #26
0
# ```
# librosa.load(audio_path, sr=None)
# ```
# to disable resampling.

# # Mel spectrogram
# This first step will show how to compute a [Mel](http://en.wikipedia.org/wiki/Mel_scale) spectrogram from an audio waveform.

# In[4]:


# Let's make and display a mel-scaled power (energy-squared) spectrogram
S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)

# Convert to log scale (dB). We'll use the peak power (max) as reference.
log_S = librosa.power_to_db(S, ref=np.max)

# Make a new figure
plt.figure(figsize=(12,4))

# Display the spectrogram on a mel scale
# sample rate and hop length parameters are used to render the time axis
librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel')

# Put a descriptive title on the plot
plt.title('mel power spectrogram')

# draw a color bar
plt.colorbar(format='%+02.0f dB')

# Make the figure layout compact
Example #27
0
plt.subplot(211)
plt.title('Spectrogram')
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
librosa.display.specshow(D, x_axis='time', y_axis='log')

plt.subplot(212)
plt.title('Audioform')
librosa.display.waveplot(y, sr=sr)

librosa.feature.melspectrogram(y=X, sr=sample_rate)

D = np.abs(librosa.stft(X))**2
S = librosa.feature.melspectrogram(S=D)
S = librosa.feature.melspectrogram(y=X, sr=sample_rate, n_mels=128,fmax=8000)
plt.figure(figsize=(10, 4))
librosa.display.specshow(librosa.power_to_db(S,ref=np.max),y_axis='mel', fmax=8000,x_axis='time')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel spectrogram')
plt.tight_layout()
plt.show()

y_fast = librosa.effects.time_stretch(X, 2.0)
time = np.arange(0,len(y_fast))/sample_rate
fig, ax = plt.subplots()
ax.plot(time,y_fast)
ax.set(xlabel='Time(s)',ylabel='sound amplitude')
plt.show()#compress to be twice as fast

y_slow = librosa.effects.time_stretch(X, 0.5)
time = np.arange(0,len(y_slow))/sr
fig, ax = plt.subplots()
Example #28
0
def save_mel_spectrogram(dataset_path,
                         json_path,
                         num_segments,
                         n_fft=2048,
                         hop_length=512):
    """Extracts MELs from music dataset and saves them into a json file along witgh genre labels.
      :param dataset_path (str): Path to dataset
      :param json_path (str): Path to json file used to save MELs
      :param: num_segments (int): Number of segments we want to divide sample tracks into
      :param n_fft (int): Interval we consider to apply FFT. Measured in # of samples
      :param hop_length (int): Sliding window for FFT. Measured in # of samples
      :return:
      """

    # dictionary to store mapping, labels, and MELs
    data = {"mapping": [], "labels": [], "MEL": []}

    samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    num_mel_vectors_per_segment = math.ceil(samples_per_segment / hop_length)

    # generating label list
    label_list = pd.read_csv(NON_GUITAR_LABEL_PATH,
                             delimiter='\s+',
                             index_col=False,
                             header=None)
    label_list = label_list[2].tolist()

    # generating corresponding mapping
    voicing = {}
    counter = 0

    for chord in label_list:
        if chord not in voicing:
            voicing[chord] = counter
            counter += 1

    data["mapping"].append([*voicing])

    # loop through instrument samples
    for instrument in os.listdir(dataset_path):

        # handling the audio files
        if instrument.endswith(".wav"):
            sample_path = os.path.join(dataset_path, instrument)
            signal, sample_rate = librosa.load(sample_path, sr=SAMPLE_RATE)

            # segmenting sample into its constituent 2 second chord voicing
            for s in range(num_segments):

                # calculating start and end sample for each chord voicing
                start = s * samples_per_segment
                end = start + samples_per_segment

                # extract log spaced frequency, log amplitude mel spectogram
                segment = signal[start:end]
                spectrogram = librosa.feature.melspectrogram(
                    segment,
                    hop_length=hop_length,
                    n_fft=2048,
                    sr=sr,
                    n_mels=133,
                    window="hann")
                mel_spectrogram = librosa.power_to_db(spectrogram)
                mel_spectrogram = mel_spectrogram.T

                # store only spectrogram with expected number of vectors & append corresponding label
                if len(mel_spectrogram) == num_mel_vectors_per_segment:
                    data["MEL"].append(mel_spectrogram.tolist())
                    print("{}, chord:{}".format(sample_path, s + 1))
                    data["labels"].append(voicing[label_list[s]])

    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)
Example #29
0
    for indexA in os.listdir(loadpath):
        for indexB in os.listdir(loadpath + indexA):
            os.makedirs(savepath + indexA + '\\' + indexB)
            for indexC in os.listdir(loadpath + indexA + '\\' + indexB):
                print(indexA, indexB, indexC)
                y, sr = librosa.load(loadpath + indexA + '\\' + indexB + '\\' +
                                     indexC,
                                     sr=16000)

                D = numpy.abs(
                    librosa.stft(y,
                                 n_fft=n_fft,
                                 win_length=win_length,
                                 hop_length=hop_length,
                                 window=signal.hamming,
                                 center=False))**2
                S = librosa.feature.melspectrogram(S=D, n_mels=m_bands)
                gram = librosa.power_to_db(S, ref=numpy.max)
                gram = numpy.transpose(gram, (1, 0))
                # print(numpy.shape(gram))

                file = open(
                    savepath + indexA + '\\' + indexB + '\\' + indexC + '.csv',
                    'w')
                for indexX in range(len(gram)):
                    for indexY in range(len(gram[indexX])):
                        if indexY != 0: file.write(',')
                        file.write(str(gram[indexX][indexY]))
                    file.write('\n')
                file.close()
        i.label = 0

# Split data up into sets that will feed into the network
data = []
labels = []

# Getting Input Mel-Spectrograms from music to use in Deep Learning
for i in training:
    ## 256 Mels ## 20s = 862 Frames
    y, sr = librosa.core.load(library_path + "/" + i.name)
    segment = y[60 * sr:80 * sr]
    spectrogram = librosa.feature.melspectrogram(y=segment,
                                                 sr=sr,
                                                 n_fft=2048,
                                                 n_mels=256)
    log_spectro = librosa.power_to_db(spectrogram**2, ref=1.0)
    i.spectrogram = log_spectro
    x = 0
    z = 21
    for j in range(41):
        temp = log_spectro[0:256, x:z]
        data.append(temp)
        label = i.label
        labels.append(label)
        x = z
        z += 21

for i in testing:
    ## 256 Mels ## 20s = 862 Frames
    y, sr = librosa.core.load(library_path + "/" + i.name)
    segment = y[60 * sr:80 * sr]
Example #31
0
    print('Number of Training Files: ', len(training_files))

    # Loop over files and apply SpecAugment
    for file in training_files:

        # Load the audio file
        audio, sr = librosa.load(file)

        # Extract Mel Spectrogram Features from the audio file
        mel_spectrogram = librosa.feature.melspectrogram(y=audio,
                                                         sr=sr,
                                                         n_mels=256,
                                                         hop_length=128,
                                                         fmax=8000)
        plt.figure(figsize=(14, 6))
        librosa.display.specshow(librosa.power_to_db(mel_spectrogram,
                                                     ref=np.max),
                                 x_axis='time',
                                 y_axis='mel',
                                 fmax=8000)  # Base

        # Apply SpecAugment
        apply = SpecAugment(mel_spectrogram, args.policy)

        time_warped = apply.time_warp(
        )  # Applies Time Warping to the mel spectrogram
        #plt.figure(figsize=(14, 6))
        #librosa.display.specshow(librosa.power_to_db(time_warped[0, :, :, 0].numpy(), ref=np.max), x_axis='time', y_axis='mel', fmax=8000) # Time Warped

        freq_masked = apply.freq_mask(
        )  # Applies Frequency Masking to the mel spectrogram
# Load waveforms
y, _ = librosa.load(filename, mono=True, sr=sr)

# Get spectrogram
D = librosa.stft(y=y,
                 n_fft=n_fft,
                 hop_length=hop_length,
                 win_length=win_length)
mag = np.abs(D)
scaled_mag = mag * 2

# Get mel-spectrogram
mel_basis = librosa.filters.mel(sr, n_fft, n_mels)  # (n_mels, 1+n_fft//2)
mel = np.dot(mel_basis, mag ** 1)  # (n_mels, t) # mel spectrogram
scaled_mel = np.dot(mel_basis, scaled_mag ** 1)

# Get mfccs
db = librosa.power_to_db(mel)
scaled_db = librosa.power_to_db(scaled_mel)

mfccs = np.dot(librosa.filters.dct(n_mfcc, db.shape[0]), mel)
scaled_mfccs = np.dot(librosa.filters.dct(n_mfcc, db.shape[0]), scaled_mel)

mfccs = mfccs.T  # (t, n_mfccs)
scaled_mfccs = scaled_mfccs.T

assert(np.all(mfccs * 2 == scaled_mfccs))

print(mfccs)
print(scaled_mfccs)
Example #33
0
            harmonic = librosa.effects.harmonic(y, margin=8)
            chromagram = librosa.feature.chroma_cqt(y=harmonic, sr=sr)
            note, mode = key(chromagram)

            ##Beat_srength & Tempo
            onset_env = librosa.onset.onset_strength(y, sr=sr)
            tempo = librosa.beat.tempo(onset_envelope=onset_env,
                                       aggregate=None)

            ##Power & Loudness
            S = librosa.stft(y, center=False)
            power = np.abs(S)**2
            p_mean = np.sum(power, axis=0, keepdims=True)
            p_ref = np.max(
                power)  # or whatever other reference power you want to use
            loudness = librosa.power_to_db(p_mean, ref=p_ref)

            tonnetz = np.mean(librosa.feature.tonnetz(y=harmonic, sr=sr))

            #Artist, Title, Album extraction
            filename = filename.split('-')
            artist = filename[0]
            song = filename[1]
            if len(filename) > 2:
                album = filename[2]
            else:
                album = song
            artist = artist.split(',')
            album = album[:-4]
            song, album = song.strip(), album.strip()
            album, song = album.lower(), song.lower()
Example #34
0
def get_melspec(spec, n_mels):
    # Power spectrum
    powerspec = np.abs(spec)**2
    melspec = librosa.feature.melspectrogram(S=powerspec, n_mels=n_mels)
    S = librosa.power_to_db(melspec, np.max)
    return S
Example #35
0
                                             sr=RATE,
                                             n_mels=128,
                                             fmax=8000)

    plt.subplot(211)
    ax1.set_ylim(yrange)
    plt.plot(full)

    if args.vis == 'mfcc':
        plt.subplot(212)
        librosa.display.specshow(vis, x_axis='time')
        plt.colorbar()

    elif args.vis == 'spec':
        plt.subplot(212)
        librosa.display.specshow(librosa.power_to_db(vis, ref=np.max),
                                 y_axis='mel',
                                 fmax=8000,
                                 x_axis='time')
        plt.colorbar(format='%+2.0f dB')

    plt.pause(0.01)

    previous = data_int

    end = time.time()
    tot.append(end - start)
    print("Time taken =", end - start)

print("finished recording")
print("Total time =", sum(tot))
Example #36
0
    def __test(y_true, x, rp):
        y = librosa.power_to_db(x, ref=rp, top_db=None)

        assert np.isclose(y, y_true)
Example #37
0
fmax = 16000.
max_size = 3


########################################################
# The paper uses a log-frequency representation, but for
# simplicity, we'll use a Mel spectrogram instead.
S = librosa.feature.melspectrogram(y, sr=sr, n_fft=n_fft,
                                   hop_length=hop_length,
                                   fmin=fmin,
                                   fmax=fmax,
                                   n_mels=n_mels)


plt.figure(figsize=(6, 4))
librosa.display.specshow(librosa.power_to_db(S, ref=np.max),
                         y_axis='mel', x_axis='time', sr=sr,
                         hop_length=hop_length, fmin=fmin, fmax=fmax)
plt.tight_layout()


################################################################
# Now we'll compute the onset strength envelope and onset events
# using the librosa defaults.
odf_default = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
onset_default = librosa.onset.onset_detect(y=y, sr=sr, hop_length=hop_length,
                                           units='time')


#########################################
# And similarly with the superflux method
Example #38
0
def load_mel_spectrogram_db(path, config):
    spec, mel_filters = load_mel_spectrogram(path, config)
    config['ref_power'] = np.max(spec)
    return lr.power_to_db(spec, ref=np.max), mel_filters
Example #39
0
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
import librosa as lr
import librosa.display

audio = 'arabic6'
y, sr = lr.load('./{}.wav'.format(audio))
lr.feature.melspectrogram(y=y, sr=sr)

D = np.abs(lr.stft(y))**2
S = lr.feature.melspectrogram(S=D)
S = lr.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
plt.figure(figsize=(10, 4))
lr.display.specshow(lr.power_to_db(S, ref=np.max),
                    y_axis='mel',
                    fmax=8000,
                    x_axis='time')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel spectrogram')
plt.tight_layout()
plt.show()
Example #40
0
 def build_mfcc_fingerprint(data, n_mels):
     data = librosa.feature.melspectrogram(data, sr=SAMPLE_RATE, n_mels=40)
     data = librosa.power_to_db(data, ref=np.max)
     return data
Example #41
0
def graph_audio(f, opt, y=None, sr=None, show=True, shape=None, dest=None, ext=None, verbose=True):
    '''
    This function generates various audio representation graphs for specified .wav files
    (or given audio time series and sampling rate values). It also accepts an optional parameter
    to save the generated graphs to categorized directories based on the corresponding emotion
    conveyed in the audio sample.

    Args:
        f (str): the absolute path to the input .wav file
        opt (str): the type of audio graph representation to be generated ("spect" => spectrogram,
                   "mp_spect" => mel-power spectrogram, "cqt" => constant-Q transform, "chrom" => chromagram,
                   "mfcc" => MFCC intensity values)
        y (np.ndarray): supplied audio time series; optional
        sr (int): supplied sampling rate of audio time series y; optional
        show (bool): specifies whether or not to show the resulting graph (default is True, which always
                     depicts the resulting graph)
        shape (tuple(int, int)): the dimensions (in inches) of the image to display
        dest (str): if a value is given, this will serve as the path of the root directory to write to (default
                    value is None, which does not save the resulting graph)
        ext (int): if supplied, adds "..._<ext>.png" to saved audio file
        verbose (bool): specifies whether or not to add axis labels, ticks, and colorbars to resulting plots
                        (default value is True, which adds the aforementioned details)

    Returns:
        None (function may display a graph and / or save resulting graph file to a specified directory)
    '''
	if None in [y,sr]:
        y, sr = librosa.load(f)
    cmap = cm.get_cmap('viridis')

    # Spectrogram
    if opt == 'spect':
        log_spect = np.log(get_spectrogram(y))

        if verbose:
            librosa.display.specshow(log_spect, sr=sr, x_axis='time', y_axis='linear', cmap=cmap)
            plt.colorbar(format='%+2.0f dB')
        else:
            fig, ax = plt.subplots(1)
            fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
            ax.axis('off')
            librosa.display.specshow(log_spect, sr=sr, cmap=cmap)
            plt.axis('off')

    # Mel Power Spectrogram
    elif opt == 'mp_spect':
        S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
        log_S = librosa.power_to_db(S, ref=np.max)

        if verbose:
            librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel', cmap=cmap)
            plt.colorbar(format='%+2.0f dB')
        else:
            fig, ax = plt.subplots(1)
            fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
            ax.axis('off')
            librosa.display.specshow(log_S, sr=sr, cmap=cmap)
            plt.axis('off')

    # Constant-Q Transform
    elif opt == 'cqt':
        C = librosa.cqt(y, sr)

        if verbose:
            librosa.display.specshow(librosa.amplitude_to_db(C**2),
                                     x_axis='time', y_axis='cqt_note', cmap=cmap)
            plt.colorbar(format='%+2.0f dB')
        else:
            fig,ax = plt.subplots(1)
            fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
            ax.axis('off')
            librosa.display.specshow(librosa.amplitude_to_db(C**2), cmap=cmap)
            plt.axis('off')

    # Chromagram
    elif opt == 'chrom':
        C = np.abs(librosa.cqt(y, sr))
        chroma = librosa.feature.chroma_cqt(C=C, sr=sr)

        if verbose:
            librosa.display.specshow(chroma, x_axis='time', y_axis='chroma', cmap=cmap)
            plt.colorbar()
        else:
            fig,ax = plt.subplots(1)
            fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
            ax.axis('off')
            librosa.display.specshow(chroma, cmap=cmap)
            plt.axis('off')

    # MFCC Intensity
    elif opt == 'mfcc':
        raw_mfcc = librosa.feature.mfcc(y=y,sr=sr)
        scaled_mfcc = scaled = scale(raw_mfcc, axis=1)

        if verbose:
            librosa.display.specshow(scaled, sr=sr, x_axis='time', cmap=cmap)
            plt.colorbar()

        else:
            fig, ax = plt.subplots(1)
            fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
            ax.axis('off')
            librosa.display.specshow(scaled, sr=sr, cmap=cmap)
            plt.axis('off')

    if shape:
        fig = plt.gcf()
        dpi = 256
        fig.set_size_inches(*shape)

    if show:
        plt.show()

    if dest:
        basename = os.path.basename(f)
        if shape:
            fig.set_size_inches(*shape)
        ext = '_{0:02d}'.format(ext) if ext else ''
        fig.savefig(dest + get_category(basename) + '/' + basename[:-4] + ext + '.png',
         dpi=256, frameon=False)
        plt.close()
Example #42
0
def aug_get_spectrogram_feature(filepath):
    """
    (rate, width, sig) = wavio.readwav(filepath)
    #sig, sample_rate = librosa.core.load(filepath, 16000)
    sig = sig.ravel()


    stft = torch.stft(torch.FloatTensor(sig),
                        N_FFT,
                        hop_length=int(0.01*SAMPLE_RATE),
                        win_length=int(0.030*SAMPLE_RATE),
                        window=torch.hamming_window(int(0.030*SAMPLE_RATE)),
                        center=False,
                        normalized=False,
                        onesided=True)

    stft = (stft[:,:,0].pow(2) + stft[:,:,1].pow(2)).pow(0.5);
    amag = stft.numpy();
    feat = torch.FloatTensor(amag)
    feat = torch.FloatTensor(feat).transpose(0, 1)
    """
    """
    input_nfft = int(round(sample_rate * 0.025))
    input_stride = int(round(sample_rate * 0.010))
    #S = np.abs(librosa.stft(sig))
    #mel_spec = librosa.feature.melspectrogram(sr=sample_rate, y=sig, n_mels=40, n_fft=512, hop_length=128)
    #mel_spec = librosa.feature.melspectrogram(sr=sample_rate, y=sig, n_mels=128, n_fft=N_FFT, win_length=int(0.030*SAMPLE_RATE),hop_length=int(0.01*SAMPLE_RATE))
    mel_spec = librosa.feature.melspectrogram(sr=sample_rate, y=sig, n_fft=2048, hop_length=512)
    mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
    #mel_spec = _normalize(mel_spec)
    #mel_spec = torch.FloatTensor(mel_spec)
    mel_spec = torch.FloatTensor(mel_spec).transpose(0,1)
    """

    sample_rate = 16000
    hop_length = 128

    sig, sample_rate = librosa.core.load(filepath, sample_rate)

    mel_spectrogram = librosa.feature.melspectrogram(y=sig,
                                                     n_mels=128,
                                                     sr=sample_rate,
                                                     n_fft=512,
                                                     hop_length=128)

    shape = mel_spectrogram.shape
    mel_spectrogram = np.reshape(mel_spectrogram, (-1, shape[0], shape[1]))
    mel_spectrogram = torch.from_numpy(mel_spectrogram)
    mel_spectrogram = spec_augment(mel_spectrogram)
    mel_spectrogram = librosa.power_to_db(mel_spectrogram[0, :, :], ref=np.max)
    mel_spectrogram = _normalize(mel_spectrogram)

    mel_spectrogram = torch.FloatTensor(mel_spectrogram).transpose(0, 1)
    """
    sample_rate = 16000
    hop_length = 128

    sig, sample_rate = librosa.core.load(filepath, sample_rate)

    mfcc_feat = librosa.feature.mfcc(y=sig, sr=sample_rate, hop_length = hop_length, n_mfcc = 257,n_fft=512)
    mfcc_feat = torch.FloatTensor(mfcc_feat).transpose(0,1)
    """

    return mel_spectrogram
 def calculates_log_mel(data):
     S = librosa.feature.melspectrogram(data, sr=SAMPLE_RATE, n_mels=128)
     return librosa.power_to_db(S, ref=np.max)
        sample_rate, samples = wavfile.read(trainset[index][2])
        for idx in range(1500):
            print('Creating Silence')
            start_point = np.int((900000) * (np.random.rand(1)))
            end_point = start_point + 16000
            cur_samples = samples[start_point:end_point - 1]
            power_factor = 0.5 + np.random.rand(1)
            cur_samples = cur_samples * power_factor
            S = librosa.feature.melspectrogram(cur_samples.astype(float),
                                               sr=sample_rate,
                                               n_mels=64,
                                               hop_length=250,
                                               n_fft=480,
                                               fmin=20,
                                               fmax=4000)
            log_S = librosa.power_to_db(S, ref=np.max)
            mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=64)
            delta2_mfcc = librosa.feature.delta(mfcc, order=2)
            # choose if 'silence' is going to train or validation
            if np.random.binomial(1, 1400 / 1500):
                # trainSetMEL_DB.append(log_S)
                trainSetMFCC.append(delta2_mfcc)
                trainSetClasses.append(trainset[index][0])
            else:
                # valSetMEL_DB.append(log_S)
                valSetMFCC.append(delta2_mfcc)
                valSetClasses.append(10)

    else:
        if (trainset[index][0] == 11) & (np.random.binomial(1, 0.80)):
            continue
Example #45
0
    def __test(ref):

        db = librosa.power_to_db(xp, ref=ref, top_db=None)
        xp2 = librosa.db_to_power(db, ref=ref)

        assert np.allclose(xp, xp2)
Example #46
0
def compute_MFCC(y, parameter):
    M = compute_Mel_Spectrum(y, parameter)
    M = librosa.power_to_db(M, ref=1.0)
    F = librosa.feature.mfcc(S=M, n_mfcc=parameter.mfccs)
    return F
Example #47
0
# A full list of the supported parameters is provided in the
# `librosa.display.specshow` documentation.

# %%
# Other types of spectral data
# ----------------------------
# The examples above illustrate how to plot linear spectrograms,
# but librosa provides many kinds of spectral representations:
# Mel-scaled, constant-Q, variable-Q, chromagrams, tempograms, etc.
#
# specshow can plot these just as well.  For example, a Mel spectrogram
# can be displayed as follows:

fig, ax = plt.subplots()
M = librosa.feature.melspectrogram(y=y, sr=sr)
M_db = librosa.power_to_db(M, ref=np.max)
img = librosa.display.specshow(M_db, y_axis='mel', x_axis='time', ax=ax)
ax.set(title='Mel spectrogram display')
fig.colorbar(img, ax=ax, format="%+2.f dB")

# %%
# Constant-Q plots, and other logarithmically scaled frequency representations
# such as Variable-Q or `iirt` can be decorated using either the frequencies (Hz)
# or their note names in scientific pitch notation:

C = librosa.cqt(y=y, sr=sr)
C_db = librosa.amplitude_to_db(np.abs(C), ref=np.max)

fig, ax = plt.subplots()
librosa.display.specshow(C_db, y_axis='cqt_hz', x_axis='time', ax=ax)
ax.set(title='Frequency (Hz) axis decoration')
Example #48
0
def compute_Normalized_Log_Spectrogram(y, parameter):
    P = compute_Power_Spectrogram(y, parameter)
    P_db = librosa.power_to_db(P, ref=parameter.n_fft)
    return P_db