Python power_to_db Examples, librosa.power_to_db Python Examples

Example #1

0

Show file

File: test_effects.py Project: lostanlen/librosa

    def __test(y, top_db, ref, trim_duration):
        yt, idx = librosa.effects.trim(y, top_db=top_db,
                                       ref=ref)

        # Test for index position
        fidx = [slice(None)] * y.ndim
        fidx[-1] = slice(*idx.tolist())
        assert np.allclose(yt, y[tuple(fidx)])

        # Verify logamp
        rms = librosa.feature.rmse(y=librosa.to_mono(yt), center=False)
        logamp = librosa.power_to_db(rms**2, ref=ref, top_db=None)
        assert np.all(logamp > - top_db)

        # Verify logamp
        rms_all = librosa.feature.rmse(y=librosa.to_mono(y)).squeeze()
        logamp_all = librosa.power_to_db(rms_all**2, ref=ref,
                                         top_db=None)

        start = int(librosa.samples_to_frames(idx[0]))
        stop = int(librosa.samples_to_frames(idx[1]))
        assert np.all(logamp_all[:start] <= - top_db)
        assert np.all(logamp_all[stop:] <= - top_db)

        # Verify duration
        duration = librosa.get_duration(yt)
        assert np.allclose(duration, trim_duration, atol=1e-1), duration

Example #2

0

Show file

File: test_features.py Project: ai-learn-use/librosa

def test_mfcc():

    def __test(dct_type, norm, n_mfcc, S):

        E_total = np.sum(S, axis=0)

        mfcc = librosa.feature.mfcc(S=S, dct_type=dct_type, norm=norm, n_mfcc=n_mfcc)

        assert mfcc.shape[0] == n_mfcc
        assert mfcc.shape[1] == S.shape[1]

        # In type-2 mode, DC component should be constant over all frames
        if dct_type == 2:
            assert np.var(mfcc[0] / E_total) <= 1e-30

    S = librosa.power_to_db(np.random.randn(128, 100)**2, ref=np.max)

    for n_mfcc in [13, 20]:
        for dct_type in [1, 2, 3]:
            for norm in [None, 'ortho']:
                if dct_type == 1 and norm == 'ortho':
                    tf = pytest.mark.xfail(__test, raises=NotImplementedError)
                else:
                    tf = __test
                yield tf, dct_type, norm, n_mfcc, S

Example #3

0

Show file

File: test_dataset.py Project: ankitshah009/ESC-50

def test_previews(meta):
    np.random.seed(20171207)

    recordings = meta.groupby('target')['filename'].apply(lambda cat: cat.sample(1)).reset_index()['filename']

    f, ax = plt.subplots(1, 1, sharey=False, sharex=False, figsize=(8, 2))

    with tempfile.TemporaryDirectory() as tmpdir:
        for index in range(len(recordings)):
            recording = recordings[index]
            signal = librosa.load('audio/' + recording, sr=44100)[0]
            spec = librosa.feature.melspectrogram(signal, sr=44100, n_fft=2205, hop_length=441)
            spec = librosa.power_to_db(spec)

            category = meta[meta.filename == recording].category.values[0]

            ax.imshow(spec, origin='lower', interpolation=None, cmap='viridis', aspect=1.1)
            ax.set_title(f'{category} - {recording}', fontsize=11)
            ax.get_yaxis().set_visible(False)
            ax.get_xaxis().set_visible(False)
            f.tight_layout()
            plt.savefig(f'{tmpdir}/{index:02d}.png', bbox_inches='tight', dpi=72)

        subprocess.call(['convert', '-delay', '100', '-loop', '0', f'{tmpdir}/*.png', '_esc50.gif'])

    assert filecmp.cmp('esc50.gif', '_esc50.gif')

Example #4

0

Show file

File: audio.py Project: wantongtang/asc-cnn

def static_spectrogram(
        data,
        filename,
        block_nb=0,
        mel_bands=128,
        fmax=22050,
        x_axis='time',
        y_axis='mel',
        display=False):
    """ Compute the static spectrogram of a time serie of samples.

    The static spectromgram is computed by take the power of the signal in the
    frequency domain according a decomposition in mel bands and a maximum
    frequency.

    Args:
        data (array): 1D array of audio data.
        mel_bands (int): number of mel bands for the decomposition
        fmax (int): maximum frequency (in Hertz).
        display (boolean): plotting or saving the output figure.

    Returns:
        None

    Todo:
        - remove the padding/margin around the plot
        - Add a path and a name where to save the plots

    Note:
        Need to ensure that the computation is accurate

    """
    data_freq_power = np.abs(librosa.stft(data))**2
    librosa.feature.melspectrogram(
            S=data_freq_power,
            power=2.0,
            n_mels=mel_bands,
            fmax=fmax)

    librosa.display.specshow(
            librosa.power_to_db(data_freq_power, ref=np.max),
            y_axis=y_axis,
            x_axis=x_axis,
            fmax=fmax)

    if display:
        plt.ylabel('Mel')
        plt.xlabel('Time [samples]')
        plt.show()
    else:
        spec_path = utils.read_config('path', 'spectrograms')
        fname = os.path.splitext(os.path.basename(filename))
        fig_path = utils.create_filename(
                spec_path,
                'png',
                fname[0],
                'static',
                block_nb)
        plt.savefig(fig_path)

Example #5

0

Show file

File: audio_transforms.py Project: rhyolight/nupic.research

 def __call__(self, data):
     stft = data['stft']
     sample_rate = data['sample_rate']
     n_fft = data['n_fft']
     mel_basis = librosa.filters.mel(sample_rate, n_fft, self.n_mels)
     s = np.dot(mel_basis, np.abs(stft)**2.0)
     data['mel_spectrogram'] = librosa.power_to_db(s, ref=np.max)
     return data

Example #6

0

Show file

File: test_core.py Project: Monal415/librosa

    def __test(x, ref, amin, top_db):

        y = librosa.power_to_db(x,
                                ref=ref,
                                amin=amin,
                                top_db=top_db)

        assert np.isrealobj(y)
        eq_(y.shape, x.shape)

        if top_db is not None:
            assert y.min() >= y.max()-top_db

Example #7

0

Show file

File: test_core.py Project: dpwe/librosa

def test_power_to_db_logamp():

    srand()

    NOISE_FLOOR = 1e-6

    # Make some noise
    x = np.abs(np.random.randn(1000)) + NOISE_FLOOR

    db1 = librosa.power_to_db(x**2, top_db=None)
    db2 = librosa.logamplitude(x**2, top_db=None)

    assert np.allclose(db1, db2)

Example #8

0

Show file

File: utils.py Project: hvy/chainer

    def __call__(self, path):
        # load data with trimming and normalizing
        raw, _ = librosa.load(path, self.sr, res_type='kaiser_fast')
        raw, _ = librosa.effects.trim(raw, self.top_db)
        raw /= numpy.abs(raw).max()
        raw = raw.astype(numpy.float32)

        # mu-law transform
        quantized = self.mu_law.transform(raw)

        # padding/triming
        if self.length is not None:
            if len(raw) <= self.length:
                # padding
                pad = self.length - len(raw)
                raw = numpy.concatenate(
                    (raw, numpy.zeros(pad, dtype=numpy.float32)))
                quantized = numpy.concatenate(
                    (quantized, self.quantize // 2 * numpy.ones(pad)))
                quantized = quantized.astype(numpy.int32)
            else:
                # triming
                start = random.randint(0, len(raw) - self.length - 1)
                raw = raw[start:start + self.length]
                quantized = quantized[start:start + self.length]

        # calculate mel-spectrogram
        spectrogram = librosa.feature.melspectrogram(
            raw, self.sr, n_fft=self.n_fft, hop_length=self.hop_length,
            n_mels=self.n_mels)
        spectrogram = librosa.power_to_db(
            spectrogram, ref=numpy.max)

        # normalize mel spectrogram into [-1, 1]
        spectrogram += 40
        spectrogram /= 40
        if self.length is not None:
            spectrogram = spectrogram[:, :self.length // self.hop_length]
        spectrogram = spectrogram.astype(numpy.float32)

        # expand dimensions
        one_hot = numpy.identity(
            self.quantize, dtype=numpy.float32)[quantized]
        one_hot = numpy.expand_dims(one_hot.T, 2)
        spectrogram = numpy.expand_dims(spectrogram, 2)
        quantized = numpy.expand_dims(quantized, 1)

        return one_hot[:, :-1], spectrogram, quantized[1:]

Example #9

0

Show file

File: data.py Project: adarob/magenta

def wav_to_spec(wav_audio, hparams):
  """Transforms the contents of a wav file into a series of spectrograms."""
  if hparams.spec_type == 'raw':
    spec = _wav_to_framed_samples(wav_audio, hparams)
  else:
    if hparams.spec_type == 'cqt':
      spec = _wav_to_cqt(wav_audio, hparams)
    elif hparams.spec_type == 'mel':
      spec = _wav_to_mel(wav_audio, hparams)
    else:
      raise ValueError('Invalid spec_type: {}'.format(hparams.spec_type))

    if hparams.spec_log_amplitude:
      spec = librosa.power_to_db(spec)

  return spec

Example #10

0

Show file

File: App.py Project: HuiChangZhai/huichangzhai.github.io

def melspectrogram(filename):
    import librosa

    y, sr = librosa.load(filename)
    librosa.feature.melspectrogram(y=y, sr=sr)

    D = np.abs(librosa.stft(y)) ** 2
    S = librosa.feature.melspectrogram(S=D)

    # Passing through arguments to the Mel filters
    #S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)

    import matplotlib.pyplot as plt
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max),
                             y_axis='mel', fmax=8000,
                             x_axis='time')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel spectrogram')
    plt.tight_layout()
    plt.show()

Example #11

0

Show file

File: audio_transforms.py Project: rhyolight/nupic.research

 def __call__(self, data):
     samples = data['samples']
     sample_rate = data['sample_rate']
     s = librosa.feature.melspectrogram(samples, sr=sample_rate, n_mels=self.n_mels)
     data['mel_spectrogram'] = librosa.power_to_db(s, ref=np.max)
     return data

Example #12

0

Show file

File: PlottingFeatures.py Project: Jiaqi-knight/Acoustics-Instruments


fig.add_subplot(4,2,7)
'''
Pxx, freqs, bins, im = plt.specgram(audData, Fs=rate, NFFT=1024, cmap=plt.get_cmap('autumn_r'))
cbar=plt.colorbar(im)
plt.xlabel('Time (s)')
plt.ylabel('Frequency (Hz)')
cbar.set_label('Intensity dB')
'''
y, sr = librosa.load(temp_folder)
librosa.feature.melspectrogram(y=y, sr=sr)
D = np.abs(librosa.stft(y))**2
S = librosa.feature.melspectrogram(S=D, sr=sr)
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=sr/2)
S_dB = librosa.power_to_db(S, ref=np.max)
librosa.display.specshow(S_dB, x_axis='time',y_axis='mel', sr=sr, fmax=sr/2)
#plt.yticks(np.arange(0, 10000, step=1000))
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram')



plt.show()
plt.close()


fig = plt.figure(figsize=(20, 20))
plt.subplots_adjust(hspace = 0.2, wspace = 0.2)

Example #13

0

Show file

File: utils.py Project: cghawthorne/magenta

def specgram(audio,
             n_fft=512,
             hop_length=None,
             mask=True,
             log_mag=True,
             re_im=False,
             dphase=True,
             mag_only=False):
  """Spectrogram using librosa.

  Args:
    audio: 1-D array of float32 sound samples.
    n_fft: Size of the FFT.
    hop_length: Stride of FFT. Defaults to n_fft/2.
    mask: Mask the phase derivative by the magnitude.
    log_mag: Use the logamplitude.
    re_im: Output Real and Imag. instead of logMag and dPhase.
    dphase: Use derivative of phase instead of phase.
    mag_only: Don't return phase.

  Returns:
    specgram: [n_fft/2 + 1, audio.size / hop_length, 2]. The first channel is
      the logamplitude and the second channel is the derivative of phase.
  """
  if not hop_length:
    hop_length = int(n_fft / 2.)

  fft_config = dict(
      n_fft=n_fft, win_length=n_fft, hop_length=hop_length, center=True)

  spec = librosa.stft(audio, **fft_config)

  if re_im:
    re = spec.real[:, :, np.newaxis]
    im = spec.imag[:, :, np.newaxis]
    spec_real = np.concatenate((re, im), axis=2)

  else:
    mag, phase = librosa.core.magphase(spec)
    phase_angle = np.angle(phase)

    # Magnitudes, scaled 0-1
    if log_mag:
      mag = (librosa.power_to_db(
          mag**2, amin=1e-13, top_db=120., ref=np.max) / 120.) + 1
    else:
      mag /= mag.max()

    if dphase:
      #  Derivative of phase
      phase_unwrapped = np.unwrap(phase_angle)
      p = phase_unwrapped[:, 1:] - phase_unwrapped[:, :-1]
      p = np.concatenate([phase_unwrapped[:, 0:1], p], axis=1) / np.pi
    else:
      # Normal phase
      p = phase_angle / np.pi
    # Mask the phase
    if log_mag and mask:
      p = mag * p
    # Return Mag and Phase
    p = p.astype(np.float32)[:, :, np.newaxis]
    mag = mag.astype(np.float32)[:, :, np.newaxis]
    if mag_only:
      spec_real = mag[:, :, np.newaxis]
    else:
      spec_real = np.concatenate((mag, p), axis=2)
  return spec_real

Example #14

0

Show file

File: svm.py Project: sandeshdevadiga/EQ_FiltersandAI

test_preds = classifier.predict(test_pca)
test_acc = np.sum(test_preds == y_te)
test_acc = test_acc / len(y_te)
scale_file = "debussy2ms.wav"
scale, sr = lb.load(scale_file)

#S_scale = librosa.stft(scale, n_fft=FRAME_SIZE, hop_length=HOP_SIZE)

mel_spectrogram_TestSong = lb.feature.melspectrogram(scale,
                                                     sr=SR,
                                                     n_fft=N_FFT,
                                                     hop_length=HOP_LENGTH,
                                                     n_mels=N_MELS)
#trained = scaler.transform(mel_spectrogram_TestSong)

melspectrogram_TestSong = lb.power_to_db(mel_spectrogram_TestSong**2)
melspectrogram_TestSong = melspectrogram_TestSong.reshape(
    1, melspectrogram_TestSong.shape[0] * melspectrogram_TestSong.shape[1])

print("Shape that i pass to predicter", melspectrogram_TestSong.shape)
scaler1 = StandardScaler()
scaler1.fit(melspectrogram_TestSong)
melspectrogram_TestSong = scaler1.transform(melspectrogram_TestSong)

#pca2 = PCA(n_components = 1)
#pca2.fit(melspectrogram_TestSong)
#mel_spectrogram_TestSong = pca2.transform(melspectrogram_TestSong)

print("after pca", melspectrogram_TestSong.shape)

melspectrogram_TestSong.shape

Example #15

0

Show file

File: DetectionOnsetChroma.py Project: gamaievsky/DescripteursHarmoniquesAudio

def detectionOnsets(y):
    fmin = librosa.note_to_hz(Notemin)
    fmax = librosa.note_to_hz(Notemax)
    #Nmin = int((sr/(fmax*(2**(1/BINS_PER_OCTAVE)-1))))
    #Nmax = int((sr/(fmin*(2**(1/BINS_PER_OCTAVE)-1))))
    n_bins = int(
        (librosa.note_to_midi(Notemax) - librosa.note_to_midi(Notemin)) *
        BINS_PER_OCTAVE / 12)
    Chrom = librosa.amplitude_to_db(np.abs(
        librosa.cqt(y=y,
                    sr=sr,
                    hop_length=STEP,
                    fmin=fmin,
                    bins_per_octave=BINS_PER_OCTAVE,
                    n_bins=n_bins)),
                                    ref=np.max)
    Nf = len(Chrom)
    N = len(Chrom[0])
    Diff = np.zeros((Nf, N))
    Dev = np.zeros(N)
    for j in range(1, N):
        for i in range(Nf):
            Diff[i, j] = np.abs(Chrom[i, j] - Chrom[i, j - 1])
            Dev[j] = sum(Diff[:, j])

    # FONCTION DE SEUIL
    # Ajout de zéros en queue et en tête
    l = []
    Seuil = []
    Onsets = []
    for k in range(int(H / 2)):
        l.append(0)
    for val in Dev:
        l.append(val)
    for k in range(int(H / 2)):
        l.append(0)
    #Calcul de la médiane
    for i in range(N):
        Seuil.append(ALPHA + BETA * stat.median(l[i:i + H]))
        if Dev[i] > Seuil[i]:
            Onsets.append(i)

    times = librosa.frames_to_time(np.arange(N), sr=sr, hop_length=STEP)

    # FONCTION DE TRI SUR LES  ONSETS
    i = 0
    while i < (len(Onsets) - 1):
        while (i < (len(Onsets) - 1)) and (times[Onsets[i + 1]] <
                                           times[Onsets[i]] + T):
            if Dev[Onsets[i + 1]] < Dev[Onsets[i]]: del Onsets[i + 1]
            else: del Onsets[i]
        i = i + 1

    onset_frames = librosa.util.fix_frames(Onsets,
                                           x_min=0,
                                           x_max=Chrom.shape[1] - 1)
    onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=STEP)

    #Synchronisation sur les onsets, en enlevant le début et la fin des longues frames
    ChromSync = np.zeros((Nf, len(onset_frames) - 1))
    n_att = int(librosa.time_to_frames(T_att, sr=sr, hop_length=STEP))
    for j in range(len(onset_frames) - 1):
        for i in range(Nf):
            ChromSync[i, j] = np.mean(Chrom[i][(onset_frames[j] +
                                                n_att):(onset_frames[j + 1] -
                                                        n_att)])

    #Normalisation du spectre


#    ChromSync[:,1] = librosa.power_to_db(librosa.db_to_power(ChromSync[:,1]) / np.sum(librosa.db_to_power(ChromSync[:,1])))
    if norm_spectre:
        for j in range(ChromSync.shape[1]):
            ChromSync[:, j] = librosa.power_to_db(
                librosa.db_to_power(ChromSync[:, j]) /
                np.sum(librosa.db_to_power(ChromSync[:, j])))

    #Affichage
    if plot_onsets:
        plt.figure(figsize=(13, 7))
        ax1 = plt.subplot(3, 1, 1)
        librosa.display.specshow(Chrom,
                                 bins_per_octave=BINS_PER_OCTAVE,
                                 fmin=fmin,
                                 y_axis='cqt_note',
                                 x_axis='time',
                                 x_coords=times)
        plt.title('CQT spectrogram')

        plt.subplot(3, 1, 2, sharex=ax1)
        plt.plot(times, Dev, label='Deviation')
        plt.plot(times, Seuil, color='g', label='Seuil')
        plt.vlines(times[Onsets],
                   0,
                   Dev.max(),
                   color='r',
                   alpha=0.9,
                   linestyle='--',
                   label='Onsets')
        plt.axis('tight')
        plt.legend(frameon=True, framealpha=0.75)

        ax1 = plt.subplot(3, 1, 3, sharex=ax1)
        librosa.display.specshow(ChromSync,
                                 bins_per_octave=BINS_PER_OCTAVE,
                                 fmin=fmin,
                                 y_axis='cqt_note',
                                 x_axis='time',
                                 x_coords=onset_times)
        plt.show()

    return onset_times

Example #16

0

Show file

def get_log_spectrum(x):
    s = librosa.core.stft(x, n_fft=2048, win_length=2048, hop_length=512)
    a = np.abs(s)**2
    #melspect = librosa.feature.melspectrogram(S=a)
    feat = librosa.power_to_db(a)
    return feat

Example #17

0

Show file

def test_melspectrogram_correctness(
    n_fft, sr, hop_length, n_ch, data_format, amin, dynamic_range, n_mels, mel_f_min, mel_f_max
):
    """Test the correctness of melspectrogram.

    Note that mel filterbank is tested separated

    """

    def _get_melgram_model(return_decibel, amin, dynamic_range, input_shape=None):
        # compute with kapre
        melgram_model = get_melspectrogram_layer(
            n_fft=n_fft,
            sample_rate=sr,
            n_mels=n_mels,
            mel_f_min=mel_f_min,
            mel_f_max=mel_f_max,
            win_length=win_length,
            hop_length=hop_length,
            input_data_format=data_format,
            output_data_format=data_format,
            return_decibel=return_decibel,
            input_shape=input_shape,
            db_amin=amin,
            db_dynamic_range=dynamic_range,
        )
        return melgram_model

    src_mono, batch_src, input_shape = get_audio(data_format=data_format, n_ch=n_ch)

    win_length = n_fft  # test with x2
    # compute with librosa
    S_ref = librosa.feature.melspectrogram(
        src_mono,
        sr=sr,
        n_fft=n_fft,
        hop_length=hop_length,
        win_length=win_length,
        center=False,
        power=1.0,
        n_mels=n_mels,
        fmin=mel_f_min,
        fmax=mel_f_max,
    ).T

    S_ref = np.expand_dims(S_ref, axis=2)  # time, freq, ch=1
    S_ref = np.tile(S_ref, [1, 1, n_ch])  # time, freq, ch=n_ch

    if data_format == 'channels_first':
        S_ref = np.transpose(S_ref, (2, 0, 1))  # ch, time, freq

    # melgram
    melgram_model = _get_melgram_model(
        return_decibel=False, input_shape=input_shape, amin=None, dynamic_range=120.0
    )
    S = melgram_model.predict(batch_src)[0]  # 3d representation
    np.testing.assert_allclose(S_ref, S, atol=1e-4)

    # log melgram
    melgram_model = _get_melgram_model(
        return_decibel=True, input_shape=input_shape, amin=amin, dynamic_range=dynamic_range
    )
    S = melgram_model.predict(batch_src)[0]  # 3d representation
    S_ref_db = librosa.power_to_db(S_ref, ref=1.0, amin=amin, top_db=dynamic_range)

    np.testing.assert_allclose(
        S_ref_db, S, rtol=3e-3
    )  # decibel is evaluated with relative tolerance

Example #18

0

Show file

File: audio_spectrogram.py Project: ZackStrater/perfect-pitch-ai

    sr=sr,
    n_mels=128,
)
fig, ax = plt.subplots(figsize=(30, 10))
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
img = librosa.display.specshow(D,
                               y_axis='linear',
                               x_axis='time',
                               sr=sr,
                               ax=ax,
                               cmap='viridis')
plt.figure(dpi=1200)
plt.show()

fig, ax = plt.subplots()
S_dB = librosa.power_to_db(S, ref=np.max)
img = librosa.display.specshow(S_dB,
                               x_axis='time',
                               y_axis='mel',
                               sr=sr,
                               ax=ax,
                               cmap='viridis')
fig.colorbar(img, ax=ax, format='%+2.0f dB')
ax.set_title('Mel-Frequency Spectrogram', size=20)
ax.set_ylabel('log Hz', size=15)
ax.set_xlabel('Time', size=15)

plt.show()

plt.show()

Example #19

0

Show file

def getAudioSamples(fn,
                    min_dur=50,
                    max_dur=-1,
                    fft=2048,
                    hop_length=512,
                    backtrack=True,
                    superFlux=True,
                    y=None,
                    sr=None,
                    delta=0.07):
    basename = os.path.basename(fn)
    fn = getAudioFile(fn)
    duration = 0

    # load audio
    if y is None or sr is None:
        try:
            y, sr = loadAudioData(fn)
            duration = int(getDurationFromAudioData(y, sr) * 1000)
        except audioop.error:
            duration = 0
            y = None
            sr = None

    # maxVal = y.max()
    # if maxVal != 0:
    #     y /= maxVal

    if duration <= 0:
        return ([], y, sr)

    # retrieve onsets using superflux method
    # https://librosa.github.io/librosa/auto_examples/plot_superflux.html#sphx-glr-auto-examples-plot-superflux-py
    # http://dafx13.nuim.ie/papers/09.dafx2013_submission_12.pdf
    if superFlux:
        lag = 2
        n_mels = 138
        fmin = 27.5
        fmax = 16000.0
        max_size = 3
        S = librosa.feature.melspectrogram(y,
                                           sr=sr,
                                           n_fft=fft,
                                           hop_length=hop_length,
                                           fmin=fmin,
                                           fmax=fmax,
                                           n_mels=n_mels)
        odf = librosa.onset.onset_strength(S=librosa.power_to_db(S,
                                                                 ref=np.max),
                                           sr=sr,
                                           hop_length=hop_length,
                                           lag=lag,
                                           max_size=max_size)
        onsets = librosa.onset.onset_detect(onset_envelope=odf,
                                            sr=sr,
                                            hop_length=hop_length,
                                            backtrack=backtrack,
                                            delta=delta)

    # retrieve onsets using default method
    else:
        onsets = librosa.onset.onset_detect(y=y,
                                            sr=sr,
                                            hop_length=hop_length,
                                            backtrack=backtrack,
                                            delta=delta)

    times = [
        int(round(1.0 * hop_length * onset / sr * 1000)) for onset in onsets
    ]
    # add the end of the audio
    times.append(duration - 1)

    samples = []
    for i, t in enumerate(times):
        if i > 0:
            prev = times[i - 1]
            dur = t - prev
            if max_dur > 0 and dur > max_dur:
                dur = max_dur
            if dur >= min_dur:
                samples.append({
                    "filename": basename,
                    "start": prev,
                    "dur": dur
                })

    return (samples, y, sr)

Example #20

0

Show file

File: Mel_Spectrogram_Extraction_Peter_Dataset.py Project: Jiangbin713/MSc-DL-ML-for-Heart-Sounds

                temp_signal = signal[:3 * Config.sr]  # take 3s chunks
                mask[:int(3 * Config.sr -
                          1)] = False  # go forward mask out the first 3s chunk
                signal = signal[mask]

                #compute mel-spectrogram
                mel_spec = librosa.feature.melspectrogram(
                    temp_signal,
                    sr=Config.sr,
                    n_fft=Config.n_fft,
                    hop_length=Config.hop_length,
                    n_mels=Config.n_mels,
                    fmin=Config.fmin,
                    fmax=Config.fmax)
                # compute log mel spectrogram
                log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)

                # normalization
                norm_log_mel_spec = librosa.util.normalize(log_mel_spec)

                #plot and save plot
                plt.figure(figsize=Config.figsize, dpi=Config.dpi)

                librosa.display.specshow(norm_log_mel_spec,
                                         fmin=Config.fmin,
                                         fmax=Config.fmax,
                                         sr=Config.sr,
                                         hop_length=Config.hop_length,
                                         cmap=Config.color)

                fig = plt.gcf()

Example #21

0

Show file

File: feature.py Project: wataru129/GMM

def feature_extraction(y,
                       fs=44100,
                       statistics=True,
                       include_mfcc0=True,
                       include_delta=True,
                       include_acceleration=True,
                       mfcc_params=None,
                       delta_params=None,
                       acceleration_params=None):
    eps = numpy.spacing(1)
    # 窓関数
    window = scipy.signal.hamming(mfcc_params['n_fft'], sym=False)
    # 静的係数を計算する
    # librosa.stft -> 短時間フーリエ変換
    # librosa.mel  -> メルフィルタバンクを作成する
    power_spectrogram = numpy.abs(
        librosa.stft(
            y + eps,
            n_fft=mfcc_params['n_fft'],
            #win_length=mfcc_params['win_length'],
            hop_length=mfcc_params['hop_length'],
            center=True,
            window=window))**2
    mel_basis = librosa.filters.mel(sr=fs,
                                    n_fft=mfcc_params['n_fft'],
                                    n_mels=mfcc_params['n_mels'],
                                    fmin=mfcc_params['fmin'],
                                    fmax=mfcc_params['fmax'],
                                    htk=mfcc_params['htk'])
    mel_spectrum = numpy.dot(mel_basis, power_spectrogram)
    mfcc = librosa.feature.mfcc(S=librosa.power_to_db(mel_spectrum),
                                n_mfcc=mfcc_params['n_mfcc'])
    # Collect the feature matrix 特徴量行列
    feature_matrix = mfcc
    if include_delta:
        # デルタ係数(1階微分)
        mfcc_delta = librosa.feature.delta(mfcc, **delta_params)
        # 特徴量行列にデルタ係数を加える
        feature_matrix = numpy.vstack((feature_matrix, mfcc_delta))
    if include_acceleration:
        # 加速度係数(二階微分)
        mfcc_delta2 = librosa.feature.delta(mfcc,
                                            order=2,
                                            **acceleration_params)
        # 特徴量行列にデルタ係数を加える
        feature_matrix = numpy.vstack((feature_matrix, mfcc_delta2))
    if not include_mfcc0:
        # Omit mfcc0
        feature_matrix = feature_matrix[1:, :]
    feature_matrix = feature_matrix.T
    # Collect into data structure
    if statistics:
        return {
            'feat': feature_matrix,
            'stat': {
                'mean': numpy.mean(feature_matrix, axis=0),
                'std': numpy.std(feature_matrix, axis=0),
                'N': feature_matrix.shape[0],
                'S1': numpy.sum(feature_matrix, axis=0),
                'S2': numpy.sum(feature_matrix**2, axis=0),
            }
        }
    else:
        return {'feat': feature_matrix}

Example #22

0

Show file

File: offline_process.py Project: dsalaj/common-voice-tf

                  desc=label):
 f = row['path']
 try:
     data, sr = lr.load(os.path.join(root, label, 'clips', f),
                        sr=SR,
                        mono=True,
                        dtype=np.float32,
                        res_type='kaiser_fast')
     data, _ = lr.effects.trim(
         data)  # trim leading and trailing silence
     mel_specgram = lr.feature.melspectrogram(
         data,
         n_mels=64,
         hop_length=hop_in_samples,
         n_fft=n_fft)
     mfcc = lr.feature.mfcc(S=lr.power_to_db(mel_specgram),
                            sr=SR,
                            n_mfcc=n_channels,
                            n_dim=1)
     # plt.imshow(mfcc.T, cmap='viridis', aspect='auto')
     # plt.savefig('MFCC_test_{}.png'.format(label))
     if METHOD is 'h5':
         writer.create_dataset(str(idx), data=mfcc.reshape(-1))
     elif METHOD is 'tfrecord':
         mfcc_feature = tf.train.Feature(
             float_list=tf.train.FloatList(
                 value=mfcc.reshape(-1).tolist()))
         tf_label = tf.train.Feature(
             bytes_list=tf.train.BytesList(
                 value=[label.encode('utf-8')]))
         age = '' if type(row['age']) == float else row[

Example #23

0

Show file

 def power_to_db(self, S, ref=1.0, amin=1e-10, top_db=80.0):
     return librosa.power_to_db(S, ref=ref, amin=amin, top_db=top_db)

Example #24

0

Show file

        spec_augment_tensorflow.visualization_spectrogram(
            warped_masked_spectrogram, 'after')

        shape = warped_masked_spectrogram.shape[
            0] * warped_masked_spectrogram.shape[1]
        if shape > max_shape:
            max_shape = shape
        x_train[count] = [0] * shape
        shape = 0
        for i in range(warped_masked_spectrogram.shape[0]):
            for j in range(warped_masked_spectrogram.shape[1]):
                x_train[count][shape] = warped_masked_spectrogram[i][j]
                shape - shape + 1
        count = count + 1

        librosa.display.specshow(librosa.power_to_db(melspec, ref=np.max))
        pylab.savefig(save_path, bbox_inches=None, pad_inches=0)
        pylab.close()

x_train = x_train.reshape(1, num_of_files, max_shape)

labels = np.zeros((1, num_of_files, 1))
for i in range(num_of_files):
    labels[0][i][0] = random.randint(0, 9)
    continue

from AudioDataGenerator import AudioDataGenerator

datagen = AudioDataGenerator(featurewise_center=True,
                             featurewise_std_normalization=True,
                             shift=.2,

Example #25

0

Show file

File: dataset.py Project: zhilangtaosha/VoiceConversion

    def _normalize(audio):
        audio = librosa.power_to_db(audio, ref=np.max)
        audio = (audio + 80) / 80

        return audio

Example #26

0

Show file

File: script1.py Project: aakash30jan/pyMusic

# ```
# librosa.load(audio_path, sr=None)
# ```
# to disable resampling.

# # Mel spectrogram
# This first step will show how to compute a [Mel](http://en.wikipedia.org/wiki/Mel_scale) spectrogram from an audio waveform.

# In[4]:


# Let's make and display a mel-scaled power (energy-squared) spectrogram
S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)

# Convert to log scale (dB). We'll use the peak power (max) as reference.
log_S = librosa.power_to_db(S, ref=np.max)

# Make a new figure
plt.figure(figsize=(12,4))

# Display the spectrogram on a mel scale
# sample rate and hop length parameters are used to render the time axis
librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel')

# Put a descriptive title on the plot
plt.title('mel power spectrogram')

# draw a color bar
plt.colorbar(format='%+02.0f dB')

# Make the figure layout compact

Example #27

0

Show file

plt.subplot(211)
plt.title('Spectrogram')
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
librosa.display.specshow(D, x_axis='time', y_axis='log')

plt.subplot(212)
plt.title('Audioform')
librosa.display.waveplot(y, sr=sr)

librosa.feature.melspectrogram(y=X, sr=sample_rate)

D = np.abs(librosa.stft(X))**2
S = librosa.feature.melspectrogram(S=D)
S = librosa.feature.melspectrogram(y=X, sr=sample_rate, n_mels=128,fmax=8000)
plt.figure(figsize=(10, 4))
librosa.display.specshow(librosa.power_to_db(S,ref=np.max),y_axis='mel', fmax=8000,x_axis='time')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel spectrogram')
plt.tight_layout()
plt.show()

y_fast = librosa.effects.time_stretch(X, 2.0)
time = np.arange(0,len(y_fast))/sample_rate
fig, ax = plt.subplots()
ax.plot(time,y_fast)
ax.set(xlabel='Time(s)',ylabel='sound amplitude')
plt.show()#compress to be twice as fast

y_slow = librosa.effects.time_stretch(X, 0.5)
time = np.arange(0,len(y_slow))/sr
fig, ax = plt.subplots()

Example #28

0

Show file

def save_mel_spectrogram(dataset_path,
                         json_path,
                         num_segments,
                         n_fft=2048,
                         hop_length=512):
    """Extracts MELs from music dataset and saves them into a json file along witgh genre labels.
      :param dataset_path (str): Path to dataset
      :param json_path (str): Path to json file used to save MELs
      :param: num_segments (int): Number of segments we want to divide sample tracks into
      :param n_fft (int): Interval we consider to apply FFT. Measured in # of samples
      :param hop_length (int): Sliding window for FFT. Measured in # of samples
      :return:
      """

    # dictionary to store mapping, labels, and MELs
    data = {"mapping": [], "labels": [], "MEL": []}

    samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    num_mel_vectors_per_segment = math.ceil(samples_per_segment / hop_length)

    # generating label list
    label_list = pd.read_csv(NON_GUITAR_LABEL_PATH,
                             delimiter='\s+',
                             index_col=False,
                             header=None)
    label_list = label_list[2].tolist()

    # generating corresponding mapping
    voicing = {}
    counter = 0

    for chord in label_list:
        if chord not in voicing:
            voicing[chord] = counter
            counter += 1

    data["mapping"].append([*voicing])

    # loop through instrument samples
    for instrument in os.listdir(dataset_path):

        # handling the audio files
        if instrument.endswith(".wav"):
            sample_path = os.path.join(dataset_path, instrument)
            signal, sample_rate = librosa.load(sample_path, sr=SAMPLE_RATE)

            # segmenting sample into its constituent 2 second chord voicing
            for s in range(num_segments):

                # calculating start and end sample for each chord voicing
                start = s * samples_per_segment
                end = start + samples_per_segment

                # extract log spaced frequency, log amplitude mel spectogram
                segment = signal[start:end]
                spectrogram = librosa.feature.melspectrogram(
                    segment,
                    hop_length=hop_length,
                    n_fft=2048,
                    sr=sr,
                    n_mels=133,
                    window="hann")
                mel_spectrogram = librosa.power_to_db(spectrogram)
                mel_spectrogram = mel_spectrogram.T

                # store only spectrogram with expected number of vectors & append corresponding label
                if len(mel_spectrogram) == num_mel_vectors_per_segment:
                    data["MEL"].append(mel_spectrogram.tolist())
                    print("{}, chord:{}".format(sample_path, s + 1))
                    data["labels"].append(voicing[label_list[s]])

    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)

Example #29

0

Show file

    for indexA in os.listdir(loadpath):
        for indexB in os.listdir(loadpath + indexA):
            os.makedirs(savepath + indexA + '\\' + indexB)
            for indexC in os.listdir(loadpath + indexA + '\\' + indexB):
                print(indexA, indexB, indexC)
                y, sr = librosa.load(loadpath + indexA + '\\' + indexB + '\\' +
                                     indexC,
                                     sr=16000)

                D = numpy.abs(
                    librosa.stft(y,
                                 n_fft=n_fft,
                                 win_length=win_length,
                                 hop_length=hop_length,
                                 window=signal.hamming,
                                 center=False))**2
                S = librosa.feature.melspectrogram(S=D, n_mels=m_bands)
                gram = librosa.power_to_db(S, ref=numpy.max)
                gram = numpy.transpose(gram, (1, 0))
                # print(numpy.shape(gram))

                file = open(
                    savepath + indexA + '\\' + indexB + '\\' + indexC + '.csv',
                    'w')
                for indexX in range(len(gram)):
                    for indexY in range(len(gram[indexX])):
                        if indexY != 0: file.write(',')
                        file.write(str(gram[indexX][indexY]))
                    file.write('\n')
                file.close()

Example #30

0

Show file

File: main.py Project: gegetang/Deep-Learning-Playlist-Recommendation

        i.label = 0

# Split data up into sets that will feed into the network
data = []
labels = []

# Getting Input Mel-Spectrograms from music to use in Deep Learning
for i in training:
    ## 256 Mels ## 20s = 862 Frames
    y, sr = librosa.core.load(library_path + "/" + i.name)
    segment = y[60 * sr:80 * sr]
    spectrogram = librosa.feature.melspectrogram(y=segment,
                                                 sr=sr,
                                                 n_fft=2048,
                                                 n_mels=256)
    log_spectro = librosa.power_to_db(spectrogram**2, ref=1.0)
    i.spectrogram = log_spectro
    x = 0
    z = 21
    for j in range(41):
        temp = log_spectro[0:256, x:z]
        data.append(temp)
        label = i.label
        labels.append(label)
        x = z
        z += 21

for i in testing:
    ## 256 Mels ## 20s = 862 Frames
    y, sr = librosa.core.load(library_path + "/" + i.name)
    segment = y[60 * sr:80 * sr]

Example #31

0

Show file

File: main.py Project: twistedmove/SpecAugment

    print('Number of Training Files: ', len(training_files))

    # Loop over files and apply SpecAugment
    for file in training_files:

        # Load the audio file
        audio, sr = librosa.load(file)

        # Extract Mel Spectrogram Features from the audio file
        mel_spectrogram = librosa.feature.melspectrogram(y=audio,
                                                         sr=sr,
                                                         n_mels=256,
                                                         hop_length=128,
                                                         fmax=8000)
        plt.figure(figsize=(14, 6))
        librosa.display.specshow(librosa.power_to_db(mel_spectrogram,
                                                     ref=np.max),
                                 x_axis='time',
                                 y_axis='mel',
                                 fmax=8000)  # Base

        # Apply SpecAugment
        apply = SpecAugment(mel_spectrogram, args.policy)

        time_warped = apply.time_warp(
        )  # Applies Time Warping to the mel spectrogram
        #plt.figure(figsize=(14, 6))
        #librosa.display.specshow(librosa.power_to_db(time_warped[0, :, :, 0].numpy(), ref=np.max), x_axis='time', y_axis='mel', fmax=8000) # Time Warped

        freq_masked = apply.freq_mask(
        )  # Applies Frequency Masking to the mel spectrogram

Example #32

0

Show file

File: mfcc_amplitude_normalization_test.py Project: QianQQ/Voice-Conversion

# Load waveforms
y, _ = librosa.load(filename, mono=True, sr=sr)

# Get spectrogram
D = librosa.stft(y=y,
                 n_fft=n_fft,
                 hop_length=hop_length,
                 win_length=win_length)
mag = np.abs(D)
scaled_mag = mag * 2

# Get mel-spectrogram
mel_basis = librosa.filters.mel(sr, n_fft, n_mels)  # (n_mels, 1+n_fft//2)
mel = np.dot(mel_basis, mag ** 1)  # (n_mels, t) # mel spectrogram
scaled_mel = np.dot(mel_basis, scaled_mag ** 1)

# Get mfccs
db = librosa.power_to_db(mel)
scaled_db = librosa.power_to_db(scaled_mel)

mfccs = np.dot(librosa.filters.dct(n_mfcc, db.shape[0]), mel)
scaled_mfccs = np.dot(librosa.filters.dct(n_mfcc, db.shape[0]), scaled_mel)

mfccs = mfccs.T  # (t, n_mfccs)
scaled_mfccs = scaled_mfccs.T

assert(np.all(mfccs * 2 == scaled_mfccs))

print(mfccs)
print(scaled_mfccs)

Example #33

0

Show file

            harmonic = librosa.effects.harmonic(y, margin=8)
            chromagram = librosa.feature.chroma_cqt(y=harmonic, sr=sr)
            note, mode = key(chromagram)

            ##Beat_srength & Tempo
            onset_env = librosa.onset.onset_strength(y, sr=sr)
            tempo = librosa.beat.tempo(onset_envelope=onset_env,
                                       aggregate=None)

            ##Power & Loudness
            S = librosa.stft(y, center=False)
            power = np.abs(S)**2
            p_mean = np.sum(power, axis=0, keepdims=True)
            p_ref = np.max(
                power)  # or whatever other reference power you want to use
            loudness = librosa.power_to_db(p_mean, ref=p_ref)

            tonnetz = np.mean(librosa.feature.tonnetz(y=harmonic, sr=sr))

            #Artist, Title, Album extraction
            filename = filename.split('-')
            artist = filename[0]
            song = filename[1]
            if len(filename) > 2:
                album = filename[2]
            else:
                album = song
            artist = artist.split(',')
            album = album[:-4]
            song, album = song.strip(), album.strip()
            album, song = album.lower(), song.lower()

Example #34

0

Show file

File: generate_spec_200k.py Project: kokimame/OpenFSE

def get_melspec(spec, n_mels):
    # Power spectrum
    powerspec = np.abs(spec)**2
    melspec = librosa.feature.melspectrogram(S=powerspec, n_mels=n_mels)
    S = librosa.power_to_db(melspec, np.max)
    return S

Example #35

0

Show file

File: visualization.py Project: andohuman/Shadowcol

                                             sr=RATE,
                                             n_mels=128,
                                             fmax=8000)

    plt.subplot(211)
    ax1.set_ylim(yrange)
    plt.plot(full)

    if args.vis == 'mfcc':
        plt.subplot(212)
        librosa.display.specshow(vis, x_axis='time')
        plt.colorbar()

    elif args.vis == 'spec':
        plt.subplot(212)
        librosa.display.specshow(librosa.power_to_db(vis, ref=np.max),
                                 y_axis='mel',
                                 fmax=8000,
                                 x_axis='time')
        plt.colorbar(format='%+2.0f dB')

    plt.pause(0.01)

    previous = data_int

    end = time.time()
    tot.append(end - start)
    print("Time taken =", end - start)

print("finished recording")
print("Total time =", sum(tot))

Example #36

0

Show file

File: test_core.py Project: dpwe/librosa

    def __test(y_true, x, rp):
        y = librosa.power_to_db(x, ref=rp, top_db=None)

        assert np.isclose(y, y_true)

Example #37

0

Show file

File: plot_superflux.py Project: Monal415/librosa

fmax = 16000.
max_size = 3


########################################################
# The paper uses a log-frequency representation, but for
# simplicity, we'll use a Mel spectrogram instead.
S = librosa.feature.melspectrogram(y, sr=sr, n_fft=n_fft,
                                   hop_length=hop_length,
                                   fmin=fmin,
                                   fmax=fmax,
                                   n_mels=n_mels)


plt.figure(figsize=(6, 4))
librosa.display.specshow(librosa.power_to_db(S, ref=np.max),
                         y_axis='mel', x_axis='time', sr=sr,
                         hop_length=hop_length, fmin=fmin, fmax=fmax)
plt.tight_layout()


################################################################
# Now we'll compute the onset strength envelope and onset events
# using the librosa defaults.
odf_default = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
onset_default = librosa.onset.onset_detect(y=y, sr=sr, hop_length=hop_length,
                                           units='time')


#########################################
# And similarly with the superflux method

Example #38

0

Show file

def load_mel_spectrogram_db(path, config):
    spec, mel_filters = load_mel_spectrogram(path, config)
    config['ref_power'] = np.max(spec)
    return lr.power_to_db(spec, ref=np.max), mel_filters

Example #39

0

Show file

File: melspect.py Project: OpenGenus/audio_feature

import numpy as np
import matplotlib.pyplot as plt
from glob import glob
import librosa as lr
import librosa.display

audio = 'arabic6'
y, sr = lr.load('./{}.wav'.format(audio))
lr.feature.melspectrogram(y=y, sr=sr)

D = np.abs(lr.stft(y))**2
S = lr.feature.melspectrogram(S=D)
S = lr.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
plt.figure(figsize=(10, 4))
lr.display.specshow(lr.power_to_db(S, ref=np.max),
                    y_axis='mel',
                    fmax=8000,
                    x_axis='time')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel spectrogram')
plt.tight_layout()
plt.show()

Example #40

0

Show file

File: augmentation.py Project: xiabofei/python_details

 def build_mfcc_fingerprint(data, n_mels):
     data = librosa.feature.melspectrogram(data, sr=SAMPLE_RATE, n_mels=40)
     data = librosa.power_to_db(data, ref=np.max)
     return data

Example #41

0

Show file

def graph_audio(f, opt, y=None, sr=None, show=True, shape=None, dest=None, ext=None, verbose=True):
    '''
    This function generates various audio representation graphs for specified .wav files
    (or given audio time series and sampling rate values). It also accepts an optional parameter
    to save the generated graphs to categorized directories based on the corresponding emotion
    conveyed in the audio sample.

    Args:
        f (str): the absolute path to the input .wav file
        opt (str): the type of audio graph representation to be generated ("spect" => spectrogram,
                   "mp_spect" => mel-power spectrogram, "cqt" => constant-Q transform, "chrom" => chromagram,
                   "mfcc" => MFCC intensity values)
        y (np.ndarray): supplied audio time series; optional
        sr (int): supplied sampling rate of audio time series y; optional
        show (bool): specifies whether or not to show the resulting graph (default is True, which always
                     depicts the resulting graph)
        shape (tuple(int, int)): the dimensions (in inches) of the image to display
        dest (str): if a value is given, this will serve as the path of the root directory to write to (default
                    value is None, which does not save the resulting graph)
        ext (int): if supplied, adds "..._<ext>.png" to saved audio file
        verbose (bool): specifies whether or not to add axis labels, ticks, and colorbars to resulting plots
                        (default value is True, which adds the aforementioned details)

    Returns:
        None (function may display a graph and / or save resulting graph file to a specified directory)
    '''
	if None in [y,sr]:
        y, sr = librosa.load(f)
    cmap = cm.get_cmap('viridis')

    # Spectrogram
    if opt == 'spect':
        log_spect = np.log(get_spectrogram(y))

        if verbose:
            librosa.display.specshow(log_spect, sr=sr, x_axis='time', y_axis='linear', cmap=cmap)
            plt.colorbar(format='%+2.0f dB')
        else:
            fig, ax = plt.subplots(1)
            fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
            ax.axis('off')
            librosa.display.specshow(log_spect, sr=sr, cmap=cmap)
            plt.axis('off')

    # Mel Power Spectrogram
    elif opt == 'mp_spect':
        S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
        log_S = librosa.power_to_db(S, ref=np.max)

        if verbose:
            librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel', cmap=cmap)
            plt.colorbar(format='%+2.0f dB')
        else:
            fig, ax = plt.subplots(1)
            fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
            ax.axis('off')
            librosa.display.specshow(log_S, sr=sr, cmap=cmap)
            plt.axis('off')

    # Constant-Q Transform
    elif opt == 'cqt':
        C = librosa.cqt(y, sr)

        if verbose:
            librosa.display.specshow(librosa.amplitude_to_db(C**2),
                                     x_axis='time', y_axis='cqt_note', cmap=cmap)
            plt.colorbar(format='%+2.0f dB')
        else:
            fig,ax = plt.subplots(1)
            fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
            ax.axis('off')
            librosa.display.specshow(librosa.amplitude_to_db(C**2), cmap=cmap)
            plt.axis('off')

    # Chromagram
    elif opt == 'chrom':
        C = np.abs(librosa.cqt(y, sr))
        chroma = librosa.feature.chroma_cqt(C=C, sr=sr)

        if verbose:
            librosa.display.specshow(chroma, x_axis='time', y_axis='chroma', cmap=cmap)
            plt.colorbar()
        else:
            fig,ax = plt.subplots(1)
            fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
            ax.axis('off')
            librosa.display.specshow(chroma, cmap=cmap)
            plt.axis('off')

    # MFCC Intensity
    elif opt == 'mfcc':
        raw_mfcc = librosa.feature.mfcc(y=y,sr=sr)
        scaled_mfcc = scaled = scale(raw_mfcc, axis=1)

        if verbose:
            librosa.display.specshow(scaled, sr=sr, x_axis='time', cmap=cmap)
            plt.colorbar()

        else:
            fig, ax = plt.subplots(1)
            fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
            ax.axis('off')
            librosa.display.specshow(scaled, sr=sr, cmap=cmap)
            plt.axis('off')

    if shape:
        fig = plt.gcf()
        dpi = 256
        fig.set_size_inches(*shape)

    if show:
        plt.show()

    if dest:
        basename = os.path.basename(f)
        if shape:
            fig.set_size_inches(*shape)
        ext = '_{0:02d}'.format(ext) if ext else ''
        fig.savefig(dest + get_category(basename) + '/' + basename[:-4] + ext + '.png',
         dpi=256, frameon=False)
        plt.close()

Example #42

0

Show file

def aug_get_spectrogram_feature(filepath):
    """
    (rate, width, sig) = wavio.readwav(filepath)
    #sig, sample_rate = librosa.core.load(filepath, 16000)
    sig = sig.ravel()


    stft = torch.stft(torch.FloatTensor(sig),
                        N_FFT,
                        hop_length=int(0.01*SAMPLE_RATE),
                        win_length=int(0.030*SAMPLE_RATE),
                        window=torch.hamming_window(int(0.030*SAMPLE_RATE)),
                        center=False,
                        normalized=False,
                        onesided=True)

    stft = (stft[:,:,0].pow(2) + stft[:,:,1].pow(2)).pow(0.5);
    amag = stft.numpy();
    feat = torch.FloatTensor(amag)
    feat = torch.FloatTensor(feat).transpose(0, 1)
    """
    """
    input_nfft = int(round(sample_rate * 0.025))
    input_stride = int(round(sample_rate * 0.010))
    #S = np.abs(librosa.stft(sig))
    #mel_spec = librosa.feature.melspectrogram(sr=sample_rate, y=sig, n_mels=40, n_fft=512, hop_length=128)
    #mel_spec = librosa.feature.melspectrogram(sr=sample_rate, y=sig, n_mels=128, n_fft=N_FFT, win_length=int(0.030*SAMPLE_RATE),hop_length=int(0.01*SAMPLE_RATE))
    mel_spec = librosa.feature.melspectrogram(sr=sample_rate, y=sig, n_fft=2048, hop_length=512)
    mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
    #mel_spec = _normalize(mel_spec)
    #mel_spec = torch.FloatTensor(mel_spec)
    mel_spec = torch.FloatTensor(mel_spec).transpose(0,1)
    """

    sample_rate = 16000
    hop_length = 128

    sig, sample_rate = librosa.core.load(filepath, sample_rate)

    mel_spectrogram = librosa.feature.melspectrogram(y=sig,
                                                     n_mels=128,
                                                     sr=sample_rate,
                                                     n_fft=512,
                                                     hop_length=128)

    shape = mel_spectrogram.shape
    mel_spectrogram = np.reshape(mel_spectrogram, (-1, shape[0], shape[1]))
    mel_spectrogram = torch.from_numpy(mel_spectrogram)
    mel_spectrogram = spec_augment(mel_spectrogram)
    mel_spectrogram = librosa.power_to_db(mel_spectrogram[0, :, :], ref=np.max)
    mel_spectrogram = _normalize(mel_spectrogram)

    mel_spectrogram = torch.FloatTensor(mel_spectrogram).transpose(0, 1)
    """
    sample_rate = 16000
    hop_length = 128

    sig, sample_rate = librosa.core.load(filepath, sample_rate)

    mfcc_feat = librosa.feature.mfcc(y=sig, sr=sample_rate, hop_length = hop_length, n_mfcc = 257,n_fft=512)
    mfcc_feat = torch.FloatTensor(mfcc_feat).transpose(0,1)
    """

    return mel_spectrogram

Example #43

0

Show file

File: fe_and_augmentation.py Project: xiabofei/python_details

 def calculates_log_mel(data):
     S = librosa.feature.melspectrogram(data, sr=SAMPLE_RATE, n_mels=128)
     return librosa.power_to_db(S, ref=np.max)

Example #44

0

Show file

File: CreateDataSet.py Project: eyalbd2/Kaglle-Tensorflow-Speech-Recognition

        sample_rate, samples = wavfile.read(trainset[index][2])
        for idx in range(1500):
            print('Creating Silence')
            start_point = np.int((900000) * (np.random.rand(1)))
            end_point = start_point + 16000
            cur_samples = samples[start_point:end_point - 1]
            power_factor = 0.5 + np.random.rand(1)
            cur_samples = cur_samples * power_factor
            S = librosa.feature.melspectrogram(cur_samples.astype(float),
                                               sr=sample_rate,
                                               n_mels=64,
                                               hop_length=250,
                                               n_fft=480,
                                               fmin=20,
                                               fmax=4000)
            log_S = librosa.power_to_db(S, ref=np.max)
            mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=64)
            delta2_mfcc = librosa.feature.delta(mfcc, order=2)
            # choose if 'silence' is going to train or validation
            if np.random.binomial(1, 1400 / 1500):
                # trainSetMEL_DB.append(log_S)
                trainSetMFCC.append(delta2_mfcc)
                trainSetClasses.append(trainset[index][0])
            else:
                # valSetMEL_DB.append(log_S)
                valSetMFCC.append(delta2_mfcc)
                valSetClasses.append(10)

    else:
        if (trainset[index][0] == 11) & (np.random.binomial(1, 0.80)):
            continue

Example #45

0

Show file

File: test_core.py Project: dpwe/librosa

    def __test(ref):

        db = librosa.power_to_db(xp, ref=ref, top_db=None)
        xp2 = librosa.db_to_power(db, ref=ref)

        assert np.allclose(xp, xp2)

Example #46

0

Show file

def compute_MFCC(y, parameter):
    M = compute_Mel_Spectrum(y, parameter)
    M = librosa.power_to_db(M, ref=1.0)
    F = librosa.feature.mfcc(S=M, n_mfcc=parameter.mfccs)
    return F

Example #47

0

Show file

File: plot_display.py Project: wgfi110/librosa

# A full list of the supported parameters is provided in the
# `librosa.display.specshow` documentation.

# %%
# Other types of spectral data
# ----------------------------
# The examples above illustrate how to plot linear spectrograms,
# but librosa provides many kinds of spectral representations:
# Mel-scaled, constant-Q, variable-Q, chromagrams, tempograms, etc.
#
# specshow can plot these just as well.  For example, a Mel spectrogram
# can be displayed as follows:

fig, ax = plt.subplots()
M = librosa.feature.melspectrogram(y=y, sr=sr)
M_db = librosa.power_to_db(M, ref=np.max)
img = librosa.display.specshow(M_db, y_axis='mel', x_axis='time', ax=ax)
ax.set(title='Mel spectrogram display')
fig.colorbar(img, ax=ax, format="%+2.f dB")

# %%
# Constant-Q plots, and other logarithmically scaled frequency representations
# such as Variable-Q or `iirt` can be decorated using either the frequencies (Hz)
# or their note names in scientific pitch notation:

C = librosa.cqt(y=y, sr=sr)
C_db = librosa.amplitude_to_db(np.abs(C), ref=np.max)

fig, ax = plt.subplots()
librosa.display.specshow(C_db, y_axis='cqt_hz', x_axis='time', ax=ax)
ax.set(title='Frequency (Hz) axis decoration')

Example #48

0

Show file

def compute_Normalized_Log_Spectrogram(y, parameter):
    P = compute_Power_Spectrogram(y, parameter)
    P_db = librosa.power_to_db(P, ref=parameter.n_fft)
    return P_db