Python load_audio 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: kospeech.data.audio.core

메소드/함수: load_audio

hotexamples.com에서의 예제들: 5

Python load_audio - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 kospeech.data.audio.core.load_audio에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: parser.py 프로젝트: kldami/KoSpeech

    def parse_audio(self, audio_path: str, augment_method: int) -> Tensor:
        """
        Parses audio.

        Args:
             audio_path (str): path of audio file
             augment_method (int): flag indication which augmentation method to use.

        Returns: feature_vector
            - **feature_vector** (torch.FloatTensor): feature from audio file.
        """
        signal = load_audio(audio_path, self.del_silence)

        if augment_method == SpectrogramParser.NOISE_INJECTION or augment_method == SpectrogramParser.HYBRID_AUGMENT:
            signal = self.noise_injector(signal)

        feature_vector = self.transforms(signal)

        if self.normalize:
            feature_vector -= feature_vector.mean()
            feature_vector /= np.std(feature_vector)

        # Refer to "Sequence to Sequence Learning with Neural Network" paper
        if self.input_reverse:
            feature_vector = feature_vector[:, ::-1]
            feature_vector = FloatTensor(np.ascontiguousarray(np.swapaxes(feature_vector, 0, 1)))
        else:
            feature_vector = FloatTensor(feature_vector).transpose(0, 1)

        if augment_method == SpectrogramParser.SPEC_AUGMENT or augment_method == SpectrogramParser.HYBRID_AUGMENT:
            feature_vector = self.spec_augment(feature_vector)

        return feature_vector

예제 #2

파일 보기

def parse_audio(audio_path: str, del_silence: bool = True) -> Tensor:
    signal = load_audio(audio_path, del_silence)

    feature_vector = torchaudio.compliance.kaldi.fbank(
        Tensor(signal).unsqueeze(0),
        num_mel_bins=80,
        frame_length=20,
        frame_shift=10,
        window_type='hamming').transpose(0, 1).numpy()
    feature_vector -= feature_vector.mean()
    feature_vector = Tensor(feature_vector).transpose(0, 1)

    return feature_vector

예제 #3

파일 보기

파일: run_pretrain.py 프로젝트: SoftwareImpacts/SIMPAC-2020-63

def parse_audio(audio_path: str, del_silence: bool = True) -> Tensor:
    signal = load_audio(audio_path, del_silence)
    mfcc = librosa.feature.mfcc(y=signal,
                                sr=16000,
                                n_mfcc=40,
                                n_fft=320,
                                hop_length=160)

    mfcc -= mfcc.mean()
    mfcc = Tensor(mfcc).transpose(0, 1)

    mfcc = mfcc[:, ::-1]
    mfcc = torch.FloatTensor(np.ascontiguousarray(np.swapaxes(mfcc, 0, 1)))

    return mfcc

예제 #4

파일 보기

파일: inference.py 프로젝트: yagyapandeya/KoSpeech

def parse_audio(audio_path: str,
                del_silence: bool = False,
                audio_extension: str = 'pcm') -> Tensor:
    signal = load_audio(audio_path, del_silence, extension=audio_extension)
    feature = torchaudio.compliance.kaldi.fbank(
        waveform=Tensor(signal).unsqueeze(0),
        num_mel_bins=80,
        frame_length=20,
        frame_shift=10,
        window_type='hamming').transpose(0, 1).numpy()

    feature -= feature.mean()
    feature /= np.std(feature)

    return torch.FloatTensor(feature).transpose(0, 1)

예제 #5

파일 보기

파일: parser.py 프로젝트: hwiorn/KoSpeech

    def parse_audio(self, audio_path: str, augment_method: int) -> Tensor:
        """
        Parses audio.

        Args:
             audio_path (str): path of audio file
             augment_method (int): flag indication which augmentation method to use.

        Returns: feature_vector
            - **feature_vector** (torch.FloatTensor): feature from audio file.
        """
        signal = load_audio(audio_path,
                            self.del_silence,
                            extension=self.audio_extension)

        if signal is None:
            logger.info("Audio is None : {0}".format(audio_path))
            return None

        feature = self.transforms(signal)

        if self.normalize:
            feature -= feature.mean()
            feature /= np.std(feature)

        # Refer to "Sequence to Sequence Learning with Neural Network" paper
        if self.input_reverse:
            feature = feature[:, ::-1]
            feature = FloatTensor(
                np.ascontiguousarray(np.swapaxes(feature, 0, 1)))
        else:
            feature = FloatTensor(feature).transpose(0, 1)

        if augment_method == SpectrogramParser.SPEC_AUGMENT:
            feature = self.spec_augment(feature)

        return feature