def test_batch_pitch(self): waveform, sample_rate = torchaudio.load(self.test_filepath) # Single then transform then batch expected = F.detect_pitch_frequency(waveform, sample_rate) expected = expected.unsqueeze(0).repeat(3, 1, 1) # Batch then transform waveform = waveform.unsqueeze(0).repeat(3, 1, 1) computed = F.detect_pitch_frequency(waveform, sample_rate) self.assertTrue(computed.shape == expected.shape, (computed.shape, expected.shape)) self.assertTrue(torch.allclose(computed, expected))
def test_detect_pitch_frequency_pitch(self, frequency): sample_rate = 44100 test_sine_waveform = get_sinusoid(frequency=frequency, sample_rate=sample_rate, duration=5) freq = F.detect_pitch_frequency(test_sine_waveform, sample_rate) threshold = 1 s = ((freq - frequency).abs() > threshold).sum() self.assertFalse(s)
dct_type=2, norm='ortho') plot_spectrogram(mfcc_librosa) mse = torch.square(mfcc - mfcc_librosa).mean().item() print('Mean Square Difference: ', mse) ###################################################################### # Pitch # ----- # waveform, sample_rate = get_speech_sample() pitch = F.detect_pitch_frequency(waveform, sample_rate) plot_pitch(waveform, sample_rate, pitch) play_audio(waveform, sample_rate) ###################################################################### # Kaldi Pitch (beta) # ------------------ # # Kaldi Pitch feature [1] is a pitch detection mechanism tuned for automatic # speech recognition (ASR) applications. This is a beta feature in ``torchaudio``, # and it is available only in ``functional``. # # 1. A pitch extraction algorithm tuned for automatic speech recognition # # Ghahremani, B. BabaAli, D. Povey, K. Riedhammer, J. Trmal and S. # Khudanpur
def func(tensor): sample_rate = 44100 return F.detect_pitch_frequency(tensor, sample_rate)