def __init__(self, dataset_config): """Initialize the DeepSpeechDataset class. Args: dataset_config: DatasetConfig object. """ self.config = dataset_config # Instantiate audio feature extractor. self.audio_featurizer = featurizer.AudioFeaturizer( sample_rate=self.config.audio_config.sample_rate, window_ms=self.config.audio_config.window_ms, stride_ms=self.config.audio_config.stride_ms) # Instantiate text feature extractor. self.text_featurizer = featurizer.TextFeaturizer( vocab_file=self.config.vocab_file_path) self.speech_labels = self.text_featurizer.speech_labels self.entries = _preprocess_data(self.config.data_path) # The generated spectrogram will have 161 feature bins. self.num_feature_bins = 161
def __init__(self, dataset_config): """Initialize the DeepSpeechDataset class. Args: dataset_config: DatasetConfig object. """ self.config = dataset_config # Instantiate audio feature extractor. self.audio_featurizer = featurizer.AudioFeaturizer( sample_rate=self.config.audio_config.sample_rate, frame_length=self.config.audio_config.frame_length, frame_step=self.config.audio_config.frame_step, fft_length=self.config.audio_config.fft_length) # Instantiate text feature extractor. self.text_featurizer = featurizer.TextFeaturizer( vocab_file=self.config.vocab_file_path) self.speech_labels = self.text_featurizer.speech_labels self.features, self.labels = _preprocess_data( self.config, self.audio_featurizer, self.text_featurizer.token_to_idx) self.num_feature_bins = (self.features[0].shape[1] if len(self.features) else None)