def _setup_dataloader_from_config(self, config: Optional[Dict]): if config.get('manifest_filepath') is None: return if 'augmentor' in config: augmentor = process_augmentations(config['augmentor']) else: augmentor = None featurizer = WaveformFeaturizer(sample_rate=config['sample_rate'], int_values=config.get( 'int_values', False), augmentor=augmentor) dataset = AudioLabelDataset( manifest_filepath=config['manifest_filepath'], labels=config['labels'], featurizer=featurizer, max_duration=config.get('max_duration', None), min_duration=config.get('min_duration', None), trim=config.get('trim_silence', True), load_audio=config.get('load_audio', True), ) return torch.utils.data.DataLoader( dataset=dataset, batch_size=config['batch_size'], collate_fn=dataset.collate_fn, drop_last=config.get('drop_last', False), shuffle=config['shuffle'], num_workers=config.get('num_workers', 0), pin_memory=config.get('pin_memory', False), )
def _setup_dataloader_from_config(self, cfg: DictConfig): if 'manifest_filepath' in cfg and cfg['manifest_filepath'] is None: logging.warning( f"Could not load dataset as `manifest_filepath` was None. Provided config : {cfg}" ) return None if 'augmentor' in cfg: augmentor = process_augmentations(cfg['augmentor']) else: augmentor = None dataset = _AudioTextDataset( manifest_filepath=cfg['manifest_filepath'], parser=self.parser, sample_rate=cfg['sample_rate'], int_values=cfg.get('int_values', False), augmentor=augmentor, max_duration=cfg.get('max_duration', None), min_duration=cfg.get('min_duration', None), max_utts=cfg.get('max_utts', 0), trim=cfg.get('trim_silence', True), load_audio=cfg.get('load_audio', True), add_misc=cfg.get('add_misc', False), ) return torch.utils.data.DataLoader( dataset=dataset, batch_size=cfg['batch_size'], collate_fn=dataset.collate_fn, drop_last=cfg.get('drop_last', False), shuffle=cfg['shuffle'], num_workers=cfg.get('num_workers', 0), )
def _setup_dataloader_from_config(self, config: Optional[Dict]): if 'augmentor' in config: augmentor = process_augmentations(config['augmentor']) else: augmentor = None shuffle = config['shuffle'] device = 'gpu' if torch.cuda.is_available() else 'cpu' if config.get('use_dali', False): device_id = self.local_rank if device == 'gpu' else None dataset = audio_to_text_dataset.get_dali_char_dataset( config=config, shuffle=shuffle, device_id=device_id, global_rank=self.global_rank, world_size=self.world_size, preprocessor_cfg=self._cfg.preprocessor, ) return dataset # Instantiate tarred dataset loader or normal dataset loader if config.get('is_tarred', False): if ('tarred_audio_filepaths' in config and config['tarred_audio_filepaths'] is None) or ( 'manifest_filepath' in config and config['manifest_filepath'] is None ): logging.warning( "Could not load dataset as `manifest_filepath` was None or " f"`tarred_audio_filepaths` is None. Provided config : {config}" ) return None shuffle_n = config.get('shuffle_n', 4 * config['batch_size']) if shuffle else 0 dataset = audio_to_text_dataset.get_tarred_char_dataset( config=config, shuffle_n=shuffle_n, global_rank=self.global_rank, world_size=self.world_size, augmentor=augmentor, ) shuffle = False else: if 'manifest_filepath' in config and config['manifest_filepath'] is None: logging.warning(f"Could not load dataset as `manifest_filepath` was None. Provided config : {config}") return None dataset = audio_to_text_dataset.get_char_dataset(config=config, augmentor=augmentor) return torch.utils.data.DataLoader( dataset=dataset, batch_size=config['batch_size'], collate_fn=dataset.collate_fn, drop_last=config.get('drop_last', False), shuffle=shuffle, num_workers=config.get('num_workers', 0), pin_memory=config.get('pin_memory', False), )
def _setup_dataloader_from_config(self, config: Optional[Dict]): if 'augmentor' in config: augmentor = process_augmentations(config['augmentor']) else: augmentor = None shuffle = config['shuffle'] # Instantiate tarred dataset loader or normal dataset loader if config.get('is_tarred', False): if ('tarred_audio_filepaths' in config and config['tarred_audio_filepaths'] is None) or ( 'manifest_filepath' in config and config['manifest_filepath'] is None): logging.warning( "Could not load dataset as `manifest_filepath` was None or " f"`tarred_audio_filepaths` is None. Provided config : {config}" ) return None shuffle_n = config.get('shuffle_n', 4 * config['batch_size']) if shuffle else 0 dataset = audio_to_text_dataset.get_tarred_bpe_dataset( config=config, tokenizer=self.tokenizer, shuffle_n=shuffle_n, global_rank=self.global_rank, world_size=self.world_size, augmentor=augmentor, ) shuffle = False else: if 'manifest_filepath' in config and config[ 'manifest_filepath'] is None: logging.warning( f"Could not load dataset as `manifest_filepath` was None. Provided config : {config}" ) return None dataset = audio_to_text_dataset.get_bpe_dataset( config=config, tokenizer=self.tokenizer, augmentor=augmentor) return torch.utils.data.DataLoader( dataset=dataset, batch_size=config['batch_size'], collate_fn=dataset.collate_fn, drop_last=config.get('drop_last', False), shuffle=shuffle, num_workers=config.get('num_workers', 0), pin_memory=config.get('pin_memory', False), )
def _setup_dataloader_from_config(self, config: Optional[Dict]): if config.get('manifest_filepath') is None: return if 'augmentor' in config: augmentor = process_augmentations(config['augmentor']) else: augmentor = None featurizer = WaveformFeaturizer(sample_rate=config['sample_rate'], int_values=config.get( 'int_values', False), augmentor=augmentor) if 'vad_stream' in config and config['vad_stream']: print("Perform streaming frame-level VAD") dataset = AudioToSpeechLabelDataSet( manifest_filepath=config['manifest_filepath'], labels=config['labels'], featurizer=featurizer, max_duration=config.get('max_duration', None), min_duration=config.get('min_duration', None), trim=config.get('trim_silence', True), load_audio=config.get('load_audio', True), time_length=config.get('time_length', 0.31), shift_length=config.get('shift_length', 0.01), ) batch_size = 1 collate_func = dataset.vad_frame_seq_collate_fn else: dataset = AudioLabelDataset( manifest_filepath=config['manifest_filepath'], labels=config['labels'], featurizer=featurizer, max_duration=config.get('max_duration', None), min_duration=config.get('min_duration', None), trim=config.get('trim_silence', True), load_audio=config.get('load_audio', True), ) batch_size = config['batch_size'] collate_func = dataset.collate_fn return torch.utils.data.DataLoader( dataset=dataset, batch_size=batch_size, collate_fn=collate_func, drop_last=config.get('drop_last', False), shuffle=config['shuffle'], num_workers=config.get('num_workers', 0), pin_memory=config.get('pin_memory', False), )
def __setup_dataloader_from_config(self, config: Optional[Dict]): if 'augmentor' in config: augmentor = process_augmentations(config['augmentor']) else: augmentor = None featurizer = WaveformFeaturizer(sample_rate=config['sample_rate'], int_values=config.get( 'int_values', False), augmentor=augmentor) self.dataset = AudioToSpeechLabelDataset( manifest_filepath=config['manifest_filepath'], labels=config['labels'], featurizer=featurizer, max_duration=config.get('max_duration', None), min_duration=config.get('min_duration', None), trim=False, load_audio=config.get('load_audio', True), time_length=config.get('time_length', 8), shift_length=config.get('shift_length', 0.75), ) if self.task == 'diarization': logging.info("Setting up diarization parameters") _collate_func = self.dataset.sliced_seq_collate_fn batch_size = 1 shuffle = False else: logging.info("Setting up identification parameters") _collate_func = self.dataset.fixed_seq_collate_fn batch_size = config['batch_size'] shuffle = config.get('shuffle', False) return torch.utils.data.DataLoader( dataset=self.dataset, batch_size=batch_size, collate_fn=_collate_func, drop_last=config.get('drop_last', False), shuffle=shuffle, num_workers=config.get('num_workers', 0), pin_memory=config.get('pin_memory', False), )
def _setup_dataloader_from_config(self, config: Optional[Dict]): if 'augmentor' in config: augmentor = process_augmentations(config['augmentor']) else: augmentor = None shuffle = config['shuffle'] # Instantiate tarred dataset loader or normal dataset loader if config.get('is_tarred', False): if ('tarred_audio_filepaths' in config and config['tarred_audio_filepaths'] is None) or ( 'manifest_filepath' in config and config['manifest_filepath'] is None): logging.warning( "Could not load dataset as `manifest_filepath` was None or " f"`tarred_audio_filepaths` is None. Provided config : {config}" ) return None shuffle_n = config.get('shuffle_n', 4 * config['batch_size']) dataset = TarredAudioToBPEDataset( audio_tar_filepaths=config['tarred_audio_filepaths'], manifest_filepath=config['manifest_filepath'], tokenizer=self.tokenizer, sample_rate=config['sample_rate'], int_values=config.get('int_values', False), augmentor=augmentor, shuffle_n=shuffle_n, max_duration=config.get('max_duration', None), min_duration=config.get('min_duration', None), max_utts=config.get('max_utts', 0), trim=config.get('trim_silence', True), add_misc=config.get('add_misc', False), global_rank=self.global_rank, world_size=self.world_size, ) shuffle = False else: if 'manifest_filepath' in config and config[ 'manifest_filepath'] is None: logging.warning( f"Could not load dataset as `manifest_filepath` was None. Provided config : {config}" ) return None dataset = AudioToBPEDataset( manifest_filepath=config['manifest_filepath'], tokenizer=self.tokenizer, sample_rate=config['sample_rate'], int_values=config.get('int_values', False), augmentor=augmentor, max_duration=config.get('max_duration', None), min_duration=config.get('min_duration', None), max_utts=config.get('max_utts', 0), trim=config.get('trim_silence', True), load_audio=config.get('load_audio', True), add_misc=config.get('add_misc', False), ) return torch.utils.data.DataLoader( dataset=dataset, batch_size=config['batch_size'], collate_fn=dataset.collate_fn, drop_last=config.get('drop_last', False), shuffle=shuffle, num_workers=config.get('num_workers', 0), pin_memory=config.get('pin_memory', False), )
def _setup_dataloader_from_config(self, config: DictConfig): OmegaConf.set_struct(config, False) config.is_regression_task = self.is_regression_task OmegaConf.set_struct(config, True) if 'augmentor' in config: augmentor = process_augmentations(config['augmentor']) else: augmentor = None featurizer = WaveformFeaturizer(sample_rate=config['sample_rate'], int_values=config.get( 'int_values', False), augmentor=augmentor) shuffle = config['shuffle'] # Instantiate tarred dataset loader or normal dataset loader if config.get('is_tarred', False): if ('tarred_audio_filepaths' in config and config['tarred_audio_filepaths'] is None) or ( 'manifest_filepath' in config and config['manifest_filepath'] is None): logging.warning( "Could not load dataset as `manifest_filepath` is None or " f"`tarred_audio_filepaths` is None. Provided config : {config}" ) return None if 'vad_stream' in config and config['vad_stream']: logging.warning( "VAD inference does not support tarred dataset now") return None shuffle_n = config.get('shuffle_n', 4 * config['batch_size']) if shuffle else 0 dataset = audio_to_label_dataset.get_tarred_classification_label_dataset( featurizer=featurizer, config=OmegaConf.to_container(config), shuffle_n=shuffle_n, global_rank=self.global_rank, world_size=self.world_size, ) shuffle = False batch_size = config['batch_size'] collate_func = dataset.collate_fn else: if 'manifest_filepath' in config and config[ 'manifest_filepath'] is None: logging.warning( f"Could not load dataset as `manifest_filepath` is None. Provided config : {config}" ) return None if 'vad_stream' in config and config['vad_stream']: logging.info("Perform streaming frame-level VAD") dataset = audio_to_label_dataset.get_speech_label_dataset( featurizer=featurizer, config=OmegaConf.to_container(config)) batch_size = 1 collate_func = dataset.vad_frame_seq_collate_fn else: dataset = audio_to_label_dataset.get_classification_label_dataset( featurizer=featurizer, config=OmegaConf.to_container(config)) batch_size = config['batch_size'] collate_func = dataset.collate_fn return torch.utils.data.DataLoader( dataset=dataset, batch_size=batch_size, collate_fn=collate_func, drop_last=config.get('drop_last', False), shuffle=shuffle, num_workers=config.get('num_workers', 0), pin_memory=config.get('pin_memory', False), )