Example #1
0
    def prepare_spectrogram_plot(
        self,
        type: SpectrogramType = SpectrogramType.power_level,
        frequency_scale: SpectrogramFrequencyScale = SpectrogramFrequencyScale.
        linear
    ) -> None:
        spectrogram = self.example.spectrogram(type,
                                               frequency_scale=frequency_scale)

        figure, axes = plt.subplots(1, 1)
        use_mel = frequency_scale == SpectrogramFrequencyScale.mel

        plt.title("\n".join(
            wrap("{0}{1} spectrogram for {2}".format(
                ("mel " if use_mel else ""), type.value, str(self)),
                 width=100)))
        plt.xlabel("time (data every {}ms)".format(
            round(1000 / self.example.time_step_rate())))
        plt.ylabel(
            "frequency (data evenly distributed on {} scale, {} total)".format(
                frequency_scale.value,
                self.example.frequency_count_from_spectrogram(spectrogram)))
        mel_frequencies = self.example.mel_frequencies()
        plt.imshow(
            spectrogram,
            cmap='gist_heat',
            origin='lower',
            aspect='auto',
            extent=[
                0, self.example.duration_in_s,
                librosa.hz_to_mel(mel_frequencies[0])[0] if use_mel else 0,
                librosa.hz_to_mel(mel_frequencies[-1])[0]
                if use_mel else self.example.highest_detectable_frequency()
            ])

        plt.colorbar(label="{} ({})".format(
            type.value, "in{} dB, not aligned to a particular base level".
            format(" something similar to" if use_mel else "") if type ==
            SpectrogramType.
            power_level else "only proportional to physical scale"))

        class ScalarFormatterWithUnit(ScalarFormatter):
            def __init__(self, unit: str):
                super().__init__()
                self.unit = unit

            def __call__(self, x, pos=None) -> str:
                return super().__call__(x, pos) + self.unit

        axes.xaxis.set_major_formatter(ScalarFormatterWithUnit("s"))
        axes.yaxis.set_major_formatter(
            FuncFormatter(lambda value, pos: "{}mel = {}Hz".format(
                int(value), int(librosa.mel_to_hz(value)[0]))
                          ) if use_mel else ScalarFormatterWithUnit("Hz"))
        figure.set_size_inches(19.20, 10.80)
    def retrieve_components(self, selection_order=None):
        if selection_order is None:
            return self.spectrogram

        S = np.zeros_like(self.spectrogram) + self.spectrogram.min()

        # following the order of segments in [Mishra 2017] Figure 4
        temp_length = S.shape[1] // self.temporal_segments
        freq_length = S.shape[0] // self.frequency_segments

        left_over = S.shape[1] - temp_length * self.temporal_segments
        if left_over > 0:
            warnings.warn("Adding last {} frames to last segment".format(left_over))

        def compute_f_start(f):
            return f * freq_length

        def compute_f_end(f):
            return compute_f_start(f) + freq_length

        if self.mel_scale:
            f_max = self.sr // 2
            mel_max = librosa.hz_to_mel(f_max)
            hz_steps = librosa.mel_to_hz(list(range(0,
                                                    int(np.ceil(mel_max)),
                                                    int(mel_max // self.frequency_segments))))
            hz_steps[-1:] = f_max

            def compute_f_start(f):
                return int(hz_steps[f] / f_max * 1025)  # TODO don't hardcode this

            def compute_f_end(f):
                return int(hz_steps[f + 1] / f_max * 1025)

        for so in selection_order:
            t = so // self.frequency_segments
            f = so % self.frequency_segments

            t_start = t * temp_length
            if t == self.temporal_segments:
                t_end = S.shape[1]
            else:
                t_end = t_start + temp_length
            f_start = compute_f_start(f)
            f_end = compute_f_end(f)
            # print("f", f, f_start, f_end)

            S[f_start:f_end, t_start:t_end] = self.spectrogram[f_start:f_end, t_start:t_end]

        return S
Example #3
0
    def __test_to_mel(infile):
        DATA = load(infile)
        z = librosa.hz_to_mel(DATA['f'], DATA['htk'])

        assert np.allclose(z, DATA['result'])
Example #4
0
def test_hz_to_mel(infile):
    DATA = load(infile)
    z = librosa.hz_to_mel(DATA["f"], htk=DATA["htk"])

    assert np.allclose(z, DATA["result"])
 def hz_to_mel(y, *args):
     return librosa.hz_to_mel(y, *args)
Example #6
0
def get_audio(track_id):
    tid_str = '{:06d}'.format(track_id)
    return os.path.join(data_path, tid_str[:3], tid_str + '.mp3')

tracks[jazz_mask].index.map(get_audio).tolist()


# %%
import torch
import librosa
import numpy as np

tsr = 13000
y, sr = librosa.load(librosa.util.example_audio_file(), sr=tsr)
y = librosa.hz_to_mel(y)
D = librosa.stft(y, n_fft=1024)
print(D.shape)
lmag = np.log(np.abs(D) + 1)
agl = np.angle(D) # / np.pi
lmag, agl = torch.from_numpy(lmag), torch.from_numpy(agl)
tensor = torch.stack((lmag, agl), 0)
tensor = tensor.squeeze()
mag = tensor[0, :, :].numpy()
agl = tensor[1, :, :].numpy()
mag = np.exp(mag) - 1
stft = mag * np.cos(agl) + (mag * np.sin(agl) * np.complex(0, 1))
y_hat = librosa.istft(stft)
y = librosa.mel_to_hz(y)
y_hat = librosa.mel_to_hz(y_hat)
# y = librosa.resample(y, sr, tsr)
Example #7
0
 def __call__(self, spect, sample_rate):
     mel_cut, mel_max = librosa.hz_to_mel(self.freq), librosa.hz_to_mel(
         sample_rate / 2),
     n_freq = int(len(spect) * mel_cut / mel_max)
     spect[:n_freq] = 0
     return spect, sample_rate
Example #8
0
 def transform_non_affine(self, a):
     return librosa.hz_to_mel(a * 1000.0)
Example #9
0
def test_hz2mel():
    h = np.random.random(10)
    assert np.allclose(hz2mel(h), librosa.hz_to_mel(h, htk=True))