def _get_2D_feature(audio, feature_type, audio_feature_dict, sampling_rate_in_hz): window_length_in_s = audio_feature_dict['window_length_in_s'] window_shift_in_s = audio_feature_dict['window_shift_in_s'] if 'num_fft_points' in audio_feature_dict: num_fft_points = audio_feature_dict['num_fft_points'] else: num_fft_points = get_length_in_samp(window_length_in_s, sampling_rate_in_hz) if 'window_type' in audio_feature_dict: window_type = audio_feature_dict['window_type'] else: window_type = 'hamming' if feature_type == 'stft_phase': return get_phase_stft_magnitude(audio, sampling_rate_in_hz, window_length_in_s, window_shift_in_s, num_fft_points, window_type) if feature_type == 'stft': return get_stft_magnitude(audio, sampling_rate_in_hz, window_length_in_s, window_shift_in_s, num_fft_points, window_type) if feature_type == 'group_delay': return get_group_delay(audio, sampling_rate_in_hz, window_length_in_s, window_shift_in_s, num_fft_points, window_type)
def _get_2D_feature(audio, feature_type, audio_feature_dict, sampling_rate_in_hz): window_length_in_s = audio_feature_dict["window_length_in_s"] window_shift_in_s = audio_feature_dict["window_shift_in_s"] window_length_in_samp = get_length_in_samp(window_length_in_s, sampling_rate_in_hz) if "num_fft_points" in audio_feature_dict: num_fft_points = audio_feature_dict["num_fft_points"] if num_fft_points < window_length_in_samp: raise ValueError("num_fft_points: {} < window length in " "samples: {} (corresponds to window length" " in s: {}".format(num_fft_points, window_length_in_s, window_length_in_samp)) else: num_fft_points = window_length_in_samp if "window_type" in audio_feature_dict: window_type = audio_feature_dict["window_type"] else: window_type = "hamming" if feature_type == "stft_phase": return get_phase_stft_magnitude(audio, sampling_rate_in_hz, window_length_in_s, window_shift_in_s, num_fft_points, window_type) if feature_type == "stft": return get_stft_magnitude(audio, sampling_rate_in_hz, window_length_in_s, window_shift_in_s, num_fft_points, window_type) if feature_type == "group_delay": return get_group_delay(audio, sampling_rate_in_hz, window_length_in_s, window_shift_in_s, num_fft_points, window_type) if feature_type == "fbank": num_filter_bands = audio_feature_dict["num_filter_bands"] return get_fbank( audio, sampling_rate_in_hz, window_length_in_s, window_shift_in_s, num_fft_points, window_type, num_filter_bands, )
def _get_2D_feature(audio, feature_type, audio_feature_dict, sampling_rate_in_hz): window_length_in_s = audio_feature_dict['window_length_in_s'] window_shift_in_s = audio_feature_dict['window_shift_in_s'] window_length_in_samp = get_length_in_samp(window_length_in_s, sampling_rate_in_hz) if 'num_fft_points' in audio_feature_dict: num_fft_points = audio_feature_dict['num_fft_points'] if num_fft_points < window_length_in_samp: raise ValueError( 'num_fft_points: {} < window length in ' 'samples: {} (corresponds to window length' ' in s: {}'.format(num_fft_points, window_length_in_s, window_length_in_samp)) else: num_fft_points = window_length_in_samp if 'window_type' in audio_feature_dict: window_type = audio_feature_dict['window_type'] else: window_type = 'hamming' if feature_type == 'stft_phase': return get_phase_stft_magnitude(audio, sampling_rate_in_hz, window_length_in_s, window_shift_in_s, num_fft_points, window_type) if feature_type == 'stft': return get_stft_magnitude(audio, sampling_rate_in_hz, window_length_in_s, window_shift_in_s, num_fft_points, window_type) if feature_type == 'group_delay': return get_group_delay(audio, sampling_rate_in_hz, window_length_in_s, window_shift_in_s, num_fft_points, window_type) if feature_type == 'fbank': num_filter_bands = audio_feature_dict['num_filter_bands'] return get_fbank(audio, sampling_rate_in_hz, window_length_in_s, window_shift_in_s, num_fft_points, window_type, num_filter_bands)
def _get_2D_feature( audio: torch.Tensor, feature_type: str, audio_feature_dict: Dict[str, Union[float, int, str]], sampling_rate_in_hz: int, ) -> torch.Tensor: window_length_in_s = audio_feature_dict["window_length_in_s"] window_shift_in_s = audio_feature_dict["window_shift_in_s"] assert torch.jit.isinstance(window_length_in_s, float) assert torch.jit.isinstance(window_shift_in_s, float) window_length_in_samp = get_length_in_samp(window_length_in_s, sampling_rate_in_hz) if "num_fft_points" in audio_feature_dict: num_fft_points = audio_feature_dict["num_fft_points"] assert torch.jit.isinstance(num_fft_points, int) if num_fft_points < window_length_in_samp: raise ValueError("num_fft_points: {} < window length in " "samples: {} (corresponds to window length" " in s: {}".format(num_fft_points, window_length_in_s, window_length_in_samp)) else: num_fft_points = window_length_in_samp if "window_type" in audio_feature_dict: window_type = audio_feature_dict["window_type"] assert torch.jit.isinstance(window_type, str) else: window_type = "hamming" if feature_type == "stft_phase": return get_phase_stft_magnitude(audio, sampling_rate_in_hz, window_length_in_s, window_shift_in_s, num_fft_points, window_type) elif feature_type == "stft": return get_stft_magnitude(audio, sampling_rate_in_hz, window_length_in_s, window_shift_in_s, num_fft_points, window_type) elif feature_type == "group_delay": return get_group_delay(audio, sampling_rate_in_hz, window_length_in_s, window_shift_in_s, num_fft_points, window_type) elif feature_type == "fbank": num_filter_bands = audio_feature_dict["num_filter_bands"] assert torch.jit.isinstance(num_filter_bands, int) return get_fbank( audio, sampling_rate_in_hz, window_length_in_s, window_shift_in_s, num_fft_points, window_type, num_filter_bands, ) else: raise ValueError( f'feature_type "{feature_type}" is not recognized.')