Beispiel #1
0
def test_mel_to_hz(infile):

    DATA = load(infile)
    z = librosa.mel_to_hz(DATA["f"], htk=DATA["htk"])
    assert np.allclose(z, DATA["result"])

    # Test for scalar conversion too
    z0 = librosa.mel_to_hz(DATA["f"][0], htk=DATA["htk"])
    assert np.allclose(z0, DATA["result"][0])
Beispiel #2
0
    def prepare_spectrogram_plot(
        self,
        type: SpectrogramType = SpectrogramType.power_level,
        frequency_scale: SpectrogramFrequencyScale = SpectrogramFrequencyScale.
        linear
    ) -> None:
        spectrogram = self.example.spectrogram(type,
                                               frequency_scale=frequency_scale)

        figure, axes = plt.subplots(1, 1)
        use_mel = frequency_scale == SpectrogramFrequencyScale.mel

        plt.title("\n".join(
            wrap("{0}{1} spectrogram for {2}".format(
                ("mel " if use_mel else ""), type.value, str(self)),
                 width=100)))
        plt.xlabel("time (data every {}ms)".format(
            round(1000 / self.example.time_step_rate())))
        plt.ylabel(
            "frequency (data evenly distributed on {} scale, {} total)".format(
                frequency_scale.value,
                self.example.frequency_count_from_spectrogram(spectrogram)))
        mel_frequencies = self.example.mel_frequencies()
        plt.imshow(
            spectrogram,
            cmap='gist_heat',
            origin='lower',
            aspect='auto',
            extent=[
                0, self.example.duration_in_s,
                librosa.hz_to_mel(mel_frequencies[0])[0] if use_mel else 0,
                librosa.hz_to_mel(mel_frequencies[-1])[0]
                if use_mel else self.example.highest_detectable_frequency()
            ])

        plt.colorbar(label="{} ({})".format(
            type.value, "in{} dB, not aligned to a particular base level".
            format(" something similar to" if use_mel else "") if type ==
            SpectrogramType.
            power_level else "only proportional to physical scale"))

        class ScalarFormatterWithUnit(ScalarFormatter):
            def __init__(self, unit: str):
                super().__init__()
                self.unit = unit

            def __call__(self, x, pos=None) -> str:
                return super().__call__(x, pos) + self.unit

        axes.xaxis.set_major_formatter(ScalarFormatterWithUnit("s"))
        axes.yaxis.set_major_formatter(
            FuncFormatter(lambda value, pos: "{}mel = {}Hz".format(
                int(value), int(librosa.mel_to_hz(value)[0]))
                          ) if use_mel else ScalarFormatterWithUnit("Hz"))
        figure.set_size_inches(19.20, 10.80)
    def retrieve_components(self, selection_order=None):
        if selection_order is None:
            return self.spectrogram

        S = np.zeros_like(self.spectrogram) + self.spectrogram.min()

        # following the order of segments in [Mishra 2017] Figure 4
        temp_length = S.shape[1] // self.temporal_segments
        freq_length = S.shape[0] // self.frequency_segments

        left_over = S.shape[1] - temp_length * self.temporal_segments
        if left_over > 0:
            warnings.warn("Adding last {} frames to last segment".format(left_over))

        def compute_f_start(f):
            return f * freq_length

        def compute_f_end(f):
            return compute_f_start(f) + freq_length

        if self.mel_scale:
            f_max = self.sr // 2
            mel_max = librosa.hz_to_mel(f_max)
            hz_steps = librosa.mel_to_hz(list(range(0,
                                                    int(np.ceil(mel_max)),
                                                    int(mel_max // self.frequency_segments))))
            hz_steps[-1:] = f_max

            def compute_f_start(f):
                return int(hz_steps[f] / f_max * 1025)  # TODO don't hardcode this

            def compute_f_end(f):
                return int(hz_steps[f + 1] / f_max * 1025)

        for so in selection_order:
            t = so // self.frequency_segments
            f = so % self.frequency_segments

            t_start = t * temp_length
            if t == self.temporal_segments:
                t_end = S.shape[1]
            else:
                t_end = t_start + temp_length
            f_start = compute_f_start(f)
            f_end = compute_f_end(f)
            # print("f", f, f_start, f_end)

            S[f_start:f_end, t_start:t_end] = self.spectrogram[f_start:f_end, t_start:t_end]

        return S
Beispiel #4
0
    def __test_to_hz(infile):
        DATA = load(infile)
        z = librosa.mel_to_hz(DATA['f'], DATA['htk'])

        assert np.allclose(z, DATA['result'])
def feature_extraction(y, fs=44100, statistics=True, include_mfcc0=True, include_delta=True,
                       include_acceleration=True, mfcc_params=None, delta_params=None, acceleration_params=None):
    """Feature extraction, MFCC based features

    Outputs features in dict, format:

        {
            'feat': feature_matrix [shape=(frame count, feature vector size)],
            'stat': {
                'mean': numpy.mean(feature_matrix, axis=0),
                'std': numpy.std(feature_matrix, axis=0),
                'N': feature_matrix.shape[0],
                'S1': numpy.sum(feature_matrix, axis=0),
                'S2': numpy.sum(feature_matrix ** 2, axis=0),
            }
        }

    Parameters
    ----------
    y: numpy.array [shape=(signal_length, )]
        Audio

    fs: int > 0 [scalar]
        Sample rate
        (Default value=44100)

    statistics: bool
        Calculate feature statistics for extracted matrix
        (Default value=True)

    include_mfcc0: bool
        Include 0th MFCC coefficient into static coefficients.
        (Default value=True)

    include_delta: bool
        Include delta MFCC coefficients.
        (Default value=True)

    include_acceleration: bool
        Include acceleration MFCC coefficients.
        (Default value=True)

    mfcc_params: dict or None
        Parameters for extraction of static MFCC coefficients.

    delta_params: dict or None
        Parameters for extraction of delta MFCC coefficients.

    acceleration_params: dict or None
        Parameters for extraction of acceleration MFCC coefficients.

    Returns
    -------
    result: dict
        Feature dict

    """

    eps = numpy.spacing(1)

    # Windowing function
    if mfcc_params['window'] == 'hamming_asymmetric':
        window = scipy.signal.hamming(mfcc_params['n_fft'], sym=False)
    elif mfcc_params['window'] == 'hamming_symmetric':
        window = scipy.signal.hamming(mfcc_params['n_fft'], sym=True)
    elif mfcc_params['window'] == 'hann_asymmetric':
        window = scipy.signal.hann(mfcc_params['n_fft'], sym=False)
    elif mfcc_params['window'] == 'hann_symmetric':
        window = scipy.signal.hann(mfcc_params['n_fft'], sym=True)
    else:
        window = None
    #print 'y: ' + str(y.shape)
    ##print 'winlength: '+ str(mfcc_params['win_length']) 
    # Calculate Static Coefficients
    magnitude_spectrogram = numpy.abs(librosa.stft(y + eps,
                                                   n_fft=mfcc_params['n_fft'],
                                                   win_length=mfcc_params['win_length'],
                                                   hop_length=mfcc_params['hop_length'],
                                                   center=True,
                                                   window=window))**2
    
   # print 'mag_spec: ' + str(magnitude_spectrogram.shape)
    mel_basis = librosa.filters.mel(sr=fs,
                                    n_fft=mfcc_params['n_fft'],
                                    n_mels=mfcc_params['n_mels'],
                                    fmin=mfcc_params['fmin'],
                                    fmax=mfcc_params['fmax'],
                                    htk=mfcc_params['htk'])
    mel_spectrum = numpy.dot(mel_basis, magnitude_spectrogram)
    mfcc = librosa.feature.mfcc(S=librosa.logamplitude(mel_spectrum))
    #mfcc = magnitude_spectrogram
    # print 'mfcc dimensions: ' + str(mfcc.shape)
    # Collect the feature matrix
    mfcc = librosa.mel_to_hz(mfcc)
    feature_matrix = mfcc
    if include_delta:
        # Delta coefficients
        mfcc_delta = librosa.feature.delta(mfcc, **delta_params)

        # Add Delta Coefficients to feature matrix
        feature_matrix = numpy.vstack((feature_matrix, mfcc_delta))

    if include_acceleration:
        # Acceleration coefficients (aka delta)
        mfcc_delta2 = librosa.feature.delta(mfcc, order=2, **acceleration_params)

        # Add Acceleration Coefficients to feature matrix
        feature_matrix = numpy.vstack((feature_matrix, mfcc_delta2))

    if not include_mfcc0:
        # Omit mfcc0
        feature_matrix = feature_matrix[1:, :]

    feature_matrix = feature_matrix.T
    print feature_matrix.shape
    # Collect into data structure
    if statistics:
        return {
            'feat': feature_matrix,
            'stat': {
                'mean': numpy.mean(feature_matrix, axis=0),
                'std': numpy.std(feature_matrix, axis=0),
                'N': feature_matrix.shape[0],
                'S1': numpy.sum(feature_matrix, axis=0),
                'S2': numpy.sum(feature_matrix ** 2, axis=0),
            }
        }
    else:
        return {
            'feat': feature_matrix}
Beispiel #6
0
tsr = 13000
y, sr = librosa.load(librosa.util.example_audio_file(), sr=tsr)
y = librosa.hz_to_mel(y)
D = librosa.stft(y, n_fft=1024)
print(D.shape)
lmag = np.log(np.abs(D) + 1)
agl = np.angle(D) # / np.pi
lmag, agl = torch.from_numpy(lmag), torch.from_numpy(agl)
tensor = torch.stack((lmag, agl), 0)
tensor = tensor.squeeze()
mag = tensor[0, :, :].numpy()
agl = tensor[1, :, :].numpy()
mag = np.exp(mag) - 1
stft = mag * np.cos(agl) + (mag * np.sin(agl) * np.complex(0, 1))
y_hat = librosa.istft(stft)
y = librosa.mel_to_hz(y)
y_hat = librosa.mel_to_hz(y_hat)
# y = librosa.resample(y, sr, tsr)
# y_hat = librosa.resample(y, sr, tsr)
librosa.output.write_wav('datasets/librosa_orig.wav', y, sr)
librosa.output.write_wav('datasets/librosa_stft.wav', y_hat, sr)

# %%
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self):
Beispiel #7
0
def mel_to_hz(y, **kwargs):
    return np.stack([librosa.mel_to_hz(y[i, :], **kwargs) for i in range(y.shape[0])])
Beispiel #8
0
 def transform_non_affine(self, a):
     return librosa.mel_to_hz(a) / 1000.0
Beispiel #9
0
def test_mel2hz():
    m = np.random.random(10)
    assert np.allclose(mel2hz(m), librosa.mel_to_hz(m, htk=True))