def val_dataloader(self): # dataloader = build_multi_dataloader( # record_root='data/tfrecords/{}.tfrecord', # index_root='data/tfrecord_index/{}.index', # data_name_list=[ # 'magic_data_test_small_2852', # 'data_aishell_test_small_589', # # 'c_500_test_small_2245', # 'ce_20_dev_small_814' # ], # batch_size=self.hparams.train_batch_size, # num_workers=self.hparams.train_loader_num_workers # # ) dataloader = build_raw_data_loader( [ 'data/manifest/libri_test_short.csv', 'data/manifest/ce_test.csv', # 'data/manifest/ce_20_dev.csv', 'data/filterd_manifest/c_500_test.csv', # 'data/manifest/ce_20_dev_small.csv', # 'aishell2_testing/manifest1.csv', # 'data/filterd_manifest/data_aishell_test.csv' ], vocab_path=self.hparams.vocab_path, batch_size=self.hparams.train_batch_size, num_workers=self.hparams.train_loader_num_workers, speed_perturb=False, max_duration=10) return dataloader
def train_dataloader(self): # dataloader = build_multi_dataloader( # record_root='data/tfrecords/{}.tfrecord', # index_root='data/tfrecord_index/{}.index', # data_name_list=[ # # 'magic_data_train_562694', # 'data_aishell_train_117346', # # 'c_500_train_549679', # # 'ce_200_161001' # ], # batch_size=self.hparams.train_batch_size, # num_workers=self.hparams.train_loader_num_workers # ) dataloader = build_raw_data_loader( [ 'data/filterd_manifest/ce_200.csv', 'data/manifest/libri_train_short.csv', 'data/filterd_manifest/c_500_train.csv', # 'data/filterd_manifest/aidatatang_200zh_train.csv', # 'data/filterd_manifest/data_aishell_train.csv', # 'data/filterd_manifest/AISHELL-2.csv', # 'data/filterd_manifest/magic_data_train.csv', # 'data/manifest/libri_100.csv', # 'data/manifest/libri_360.csv', # 'data/manifest/libri_500.csv' ], vocab_path=self.hparams.vocab_path, batch_size=self.hparams.train_batch_size, num_workers=self.hparams.train_loader_num_workers, speed_perturb=True, max_duration=10) return dataloader