def prepare_spectrogram_plot( self, type: SpectrogramType = SpectrogramType.power_level, frequency_scale: SpectrogramFrequencyScale = SpectrogramFrequencyScale. linear ) -> None: spectrogram = self.example.spectrogram(type, frequency_scale=frequency_scale) figure, axes = plt.subplots(1, 1) use_mel = frequency_scale == SpectrogramFrequencyScale.mel plt.title("\n".join( wrap("{0}{1} spectrogram for {2}".format( ("mel " if use_mel else ""), type.value, str(self)), width=100))) plt.xlabel("time (data every {}ms)".format( round(1000 / self.example.time_step_rate()))) plt.ylabel( "frequency (data evenly distributed on {} scale, {} total)".format( frequency_scale.value, self.example.frequency_count_from_spectrogram(spectrogram))) mel_frequencies = self.example.mel_frequencies() plt.imshow( spectrogram, cmap='gist_heat', origin='lower', aspect='auto', extent=[ 0, self.example.duration_in_s, librosa.hz_to_mel(mel_frequencies[0])[0] if use_mel else 0, librosa.hz_to_mel(mel_frequencies[-1])[0] if use_mel else self.example.highest_detectable_frequency() ]) plt.colorbar(label="{} ({})".format( type.value, "in{} dB, not aligned to a particular base level". format(" something similar to" if use_mel else "") if type == SpectrogramType. power_level else "only proportional to physical scale")) class ScalarFormatterWithUnit(ScalarFormatter): def __init__(self, unit: str): super().__init__() self.unit = unit def __call__(self, x, pos=None) -> str: return super().__call__(x, pos) + self.unit axes.xaxis.set_major_formatter(ScalarFormatterWithUnit("s")) axes.yaxis.set_major_formatter( FuncFormatter(lambda value, pos: "{}mel = {}Hz".format( int(value), int(librosa.mel_to_hz(value)[0])) ) if use_mel else ScalarFormatterWithUnit("Hz")) figure.set_size_inches(19.20, 10.80)
def retrieve_components(self, selection_order=None): if selection_order is None: return self.spectrogram S = np.zeros_like(self.spectrogram) + self.spectrogram.min() # following the order of segments in [Mishra 2017] Figure 4 temp_length = S.shape[1] // self.temporal_segments freq_length = S.shape[0] // self.frequency_segments left_over = S.shape[1] - temp_length * self.temporal_segments if left_over > 0: warnings.warn("Adding last {} frames to last segment".format(left_over)) def compute_f_start(f): return f * freq_length def compute_f_end(f): return compute_f_start(f) + freq_length if self.mel_scale: f_max = self.sr // 2 mel_max = librosa.hz_to_mel(f_max) hz_steps = librosa.mel_to_hz(list(range(0, int(np.ceil(mel_max)), int(mel_max // self.frequency_segments)))) hz_steps[-1:] = f_max def compute_f_start(f): return int(hz_steps[f] / f_max * 1025) # TODO don't hardcode this def compute_f_end(f): return int(hz_steps[f + 1] / f_max * 1025) for so in selection_order: t = so // self.frequency_segments f = so % self.frequency_segments t_start = t * temp_length if t == self.temporal_segments: t_end = S.shape[1] else: t_end = t_start + temp_length f_start = compute_f_start(f) f_end = compute_f_end(f) # print("f", f, f_start, f_end) S[f_start:f_end, t_start:t_end] = self.spectrogram[f_start:f_end, t_start:t_end] return S
def __test_to_mel(infile): DATA = load(infile) z = librosa.hz_to_mel(DATA['f'], DATA['htk']) assert np.allclose(z, DATA['result'])
def test_hz_to_mel(infile): DATA = load(infile) z = librosa.hz_to_mel(DATA["f"], htk=DATA["htk"]) assert np.allclose(z, DATA["result"])
def hz_to_mel(y, *args): return librosa.hz_to_mel(y, *args)
def get_audio(track_id): tid_str = '{:06d}'.format(track_id) return os.path.join(data_path, tid_str[:3], tid_str + '.mp3') tracks[jazz_mask].index.map(get_audio).tolist() # %% import torch import librosa import numpy as np tsr = 13000 y, sr = librosa.load(librosa.util.example_audio_file(), sr=tsr) y = librosa.hz_to_mel(y) D = librosa.stft(y, n_fft=1024) print(D.shape) lmag = np.log(np.abs(D) + 1) agl = np.angle(D) # / np.pi lmag, agl = torch.from_numpy(lmag), torch.from_numpy(agl) tensor = torch.stack((lmag, agl), 0) tensor = tensor.squeeze() mag = tensor[0, :, :].numpy() agl = tensor[1, :, :].numpy() mag = np.exp(mag) - 1 stft = mag * np.cos(agl) + (mag * np.sin(agl) * np.complex(0, 1)) y_hat = librosa.istft(stft) y = librosa.mel_to_hz(y) y_hat = librosa.mel_to_hz(y_hat) # y = librosa.resample(y, sr, tsr)
def __call__(self, spect, sample_rate): mel_cut, mel_max = librosa.hz_to_mel(self.freq), librosa.hz_to_mel( sample_rate / 2), n_freq = int(len(spect) * mel_cut / mel_max) spect[:n_freq] = 0 return spect, sample_rate
def transform_non_affine(self, a): return librosa.hz_to_mel(a * 1000.0)
def test_hz2mel(): h = np.random.random(10) assert np.allclose(hz2mel(h), librosa.hz_to_mel(h, htk=True))