Python FilterbankFeatures 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: nemo.collections.asr.parts.features

클래스/타입: FilterbankFeatures

hotexamples.com에서의 예제들: 4

Python FilterbankFeatures - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 nemo.collections.asr.parts.features.FilterbankFeatures에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

FilterbankFeatures(4)

자주 사용되는 메소드들

FilterbankFeatures (4)

예제 #1

파일 보기

    def test_random_stft_sizes(self):
        for _ in range(5):
            nfft = random.randint(128, 2048)
            window_size = random.randint(128, nfft)
            hop_size = random.randint(64, window_size)
            fb_module = FilterbankFeatures(
                exact_pad=False, pad_to=1, n_fft=nfft, n_window_size=window_size, n_window_stride=hop_size
            )
            audio_length = random.randint(nfft, 2 ** 16)
            test_1 = torch.randn(1, audio_length)
            test_1_len = torch.tensor([audio_length])
            result, result_len = fb_module(test_1, test_1_len)
            assert (
                result.shape[2] == result_len[0]
            ), f"{result.shape} != {result_len}: {nfft}, {window_size}, {hop_size}, {audio_length}"

            spec = librosa.stft(
                test_1.cpu().detach().numpy().squeeze(), n_fft=nfft, hop_length=hop_size, win_length=window_size
            )

            assert (
                spec.shape[1] == result.shape[2]
            ), f"{result.shape} != {spec.shape}: {nfft}, {window_size}, {hop_size}, {audio_length}"

        for _ in range(5):
            nfft = random.randint(128, 2048)
            window_size = random.randint(128, nfft)
            hop_size = random.randint(64, window_size)
            fb_module = FilterbankFeatures(
                exact_pad=True, pad_to=1, n_fft=nfft, n_window_size=window_size, n_window_stride=hop_size
            )
            audio_length = random.randint(nfft, 2 ** 16)
            test_1 = torch.randn(1, audio_length)
            test_1_len = torch.tensor([audio_length])
            result, result_len = fb_module(test_1, test_1_len)
            assert (
                result.shape[2] == result_len[0]
            ), f"{result.shape} != {result_len}: {nfft}, {window_size}, {hop_size}, {audio_length}"

            test_2 = test_1.cpu().detach().numpy().squeeze()
            test_2 = np.pad(test_2, int((window_size - hop_size) // 2), mode="reflect")
            spec = librosa.stft(test_2, n_fft=nfft, hop_length=hop_size, win_length=window_size, center=False,)

            assert (
                spec.shape[1] == result.shape[2]
            ), f"{result.shape} != {spec.shape}: {nfft}, {window_size}, {hop_size}, {audio_length}"

예제 #2

파일 보기

    def test_seq_len(self):
        fb_module = FilterbankFeatures(exact_pad=False, pad_to=1)
        test_1 = torch.randn(1, 800)
        test_1_len = torch.tensor([800])
        result, result_len = fb_module(test_1, test_1_len)
        assert result.shape[2] == result_len[0], f"{result.shape} != {result_len}"
        spec = librosa.stft(test_1.cpu().detach().numpy().squeeze(), n_fft=512, hop_length=160, win_length=320)

        assert spec.shape[1] == result.shape[2], f"{result.shape} != {spec.shape}"

예제 #3

파일 보기

파일: synthesis_utils.py 프로젝트: AndreevP/speech_distances

def make_preprocessor_trainable(stt):
    big_dict = {
        k: v
        for k, v in stt.preprocessor.featurizer.__dict__.items()
        if not k.startswith('_') and k != 'forward'
    }
    st = stt.preprocessor.featurizer.state_dict()
    stt.preprocessor.featurizer = FilterbankFeatures(use_grads=True)
    stt.preprocessor.featurizer.load_state_dict(st)
    _ = {
        setattr(stt.preprocessor.featurizer, k, v)
        for k, v in big_dict.items()
    }
    #     stt = stt.cuda()
    return stt

예제 #4

파일 보기

    def __init__(
        self,
        sample_rate=16000,
        window_size=0.02,
        window_stride=0.01,
        n_window_size=None,
        n_window_stride=None,
        window="hann",
        normalize="per_feature",
        n_fft=None,
        preemph=0.97,
        features=64,
        lowfreq=0,
        highfreq=None,
        log=True,
        log_zero_guard_type="add",
        log_zero_guard_value=2 ** -24,
        dither=1e-5,
        pad_to=16,
        frame_splicing=1,
        stft_exact_pad=False,
        stft_conv=False,
        pad_value=0,
        mag_power=2.0,
    ):
        super().__init__(n_window_size, n_window_stride)

        self._sample_rate = sample_rate
        if window_size and n_window_size:
            raise ValueError(f"{self} received both window_size and " f"n_window_size. Only one should be specified.")
        if window_stride and n_window_stride:
            raise ValueError(
                f"{self} received both window_stride and " f"n_window_stride. Only one should be specified."
            )
        if window_size:
            n_window_size = int(window_size * self._sample_rate)
        if window_stride:
            n_window_stride = int(window_stride * self._sample_rate)

        self.featurizer = FilterbankFeatures(
            sample_rate=self._sample_rate,
            n_window_size=n_window_size,
            n_window_stride=n_window_stride,
            window=window,
            normalize=normalize,
            n_fft=n_fft,
            preemph=preemph,
            nfilt=features,
            lowfreq=lowfreq,
            highfreq=highfreq,
            log=log,
            log_zero_guard_type=log_zero_guard_type,
            log_zero_guard_value=log_zero_guard_value,
            dither=dither,
            pad_to=pad_to,
            frame_splicing=frame_splicing,
            stft_exact_pad=stft_exact_pad,
            stft_conv=stft_conv,
            pad_value=pad_value,
            mag_power=mag_power,
        )