Esempio n. 1
0
    def __init__(self, dataset_config):
        """Initialize the DeepSpeechDataset class.

    Args:
      dataset_config: DatasetConfig object.
    """
        self.config = dataset_config
        # Instantiate audio feature extractor.
        self.audio_featurizer = featurizer.AudioFeaturizer(
            sample_rate=self.config.audio_config.sample_rate,
            window_ms=self.config.audio_config.window_ms,
            stride_ms=self.config.audio_config.stride_ms)
        # Instantiate text feature extractor.
        self.text_featurizer = featurizer.TextFeaturizer(
            vocab_file=self.config.vocab_file_path)

        self.speech_labels = self.text_featurizer.speech_labels
        self.entries = _preprocess_data(self.config.data_path)
        # The generated spectrogram will have 161 feature bins.
        self.num_feature_bins = 161
Esempio n. 2
0
    def __init__(self, dataset_config):
        """Initialize the DeepSpeechDataset class.

    Args:
      dataset_config: DatasetConfig object.
    """
        self.config = dataset_config
        # Instantiate audio feature extractor.
        self.audio_featurizer = featurizer.AudioFeaturizer(
            sample_rate=self.config.audio_config.sample_rate,
            frame_length=self.config.audio_config.frame_length,
            frame_step=self.config.audio_config.frame_step,
            fft_length=self.config.audio_config.fft_length)
        # Instantiate text feature extractor.
        self.text_featurizer = featurizer.TextFeaturizer(
            vocab_file=self.config.vocab_file_path)

        self.speech_labels = self.text_featurizer.speech_labels
        self.features, self.labels = _preprocess_data(
            self.config, self.audio_featurizer,
            self.text_featurizer.token_to_idx)

        self.num_feature_bins = (self.features[0].shape[1]
                                 if len(self.features) else None)