Exemple #1
0
def extract_fbank_features(
    waveform,
    sample_rate: int,
    output_path: Optional[Path] = None,
    n_mel_bins: int = 80,
    overwrite: bool = False,
):
    if output_path is not None and output_path.is_file() and not overwrite:
        return

    _waveform = waveform * (2**15)  # Kaldi compliance: 16-bit signed integers
    _waveform = _waveform.squeeze().numpy()

    features = _get_kaldi_fbank(_waveform, sample_rate, n_mel_bins)
    if features is None:
        features = _get_torchaudio_fbank(_waveform, sample_rate, n_mel_bins)
    if features is None:
        raise ImportError(
            "Please install pyKaldi or torchaudio to enable fbank feature extraction"
        )

    if output_path is not None:
        np.save(output_path.as_posix(), features)
    else:
        return features
Exemple #2
0
def extract_fbank_features(
    waveform,
    sample_rate,
    output_path=None,
    n_mel_bins=80,
    apply_utterance_cmvn=True,
    overwrite=False,
):
    if output_path is not None and op.exists(output_path) and not overwrite:
        return

    _waveform = waveform * (2**15)  # Kaldi compliance: 16-bit signed integers
    _waveform = _waveform.squeeze().numpy()

    features = _get_kaldi_fbank(_waveform, sample_rate, n_mel_bins)
    if features is None:
        features = _get_torchaudio_fbank(_waveform, sample_rate, n_mel_bins)
    if features is None:
        raise ImportError("Please install pyKaldi or torchaudio to enable "
                          "online filterbank feature extraction")

    if apply_utterance_cmvn:
        cmvn = UtteranceCMVN(norm_means=True, norm_vars=True)
        features = cmvn(features)
    if output_path is not None:
        np.save(output_path, features)
    else:
        return features