def create_data(config, data_path): dataset = LJSpeech(data_path) train_dataset = SliceDataset(dataset, config["valid_size"], len(dataset)) train_collator = DataCollector(config["p_pronunciation"]) train_sampler = RandomSampler(train_dataset) train_cargo = DataCargo(train_dataset, train_collator, batch_size=config["batch_size"], sampler=train_sampler) train_loader = DataLoader\ .from_generator(capacity=10, return_list=True)\ .set_batch_generator(train_cargo) valid_dataset = SliceDataset(dataset, 0, config["valid_size"]) valid_collector = DataCollector(1.) valid_sampler = SequentialSampler(valid_dataset) valid_cargo = DataCargo(valid_dataset, valid_collector, batch_size=1, sampler=valid_sampler) valid_loader = DataLoader\ .from_generator(capacity=2, return_list=True)\ .set_batch_generator(valid_cargo) return train_loader, valid_loader
def __init__(self, config, nranks, rank): place = fluid.CUDAPlace(rank) if config.use_gpu else fluid.CPUPlace() # Whole LJSpeech dataset. ds = Dataset(config) # Split into train and valid dataset. indices = list(range(len(ds))) train_indices = indices[config.valid_size:] valid_indices = indices[:config.valid_size] random.shuffle(train_indices) # Train dataset. trainset = Subset(ds, train_indices, valid=False) sampler = DistributedSampler(len(trainset), nranks, rank) total_bs = config.batch_size assert total_bs % nranks == 0 train_sampler = BatchSampler(sampler, total_bs // nranks, drop_last=True) trainloader = DataCargo(trainset, batch_sampler=train_sampler) trainreader = fluid.io.PyReader(capacity=50, return_list=True) trainreader.decorate_batch_generator(trainloader, place) self.trainloader = (data for _ in iter(int, 1) for data in trainreader()) # Valid dataset. validset = Subset(ds, valid_indices, valid=True) # Currently only support batch_size = 1 for valid loader. validloader = DataCargo(validset, batch_size=1, shuffle=False) validreader = fluid.io.PyReader(capacity=20, return_list=True) validreader.decorate_batch_generator(validloader, place) self.validloader = validreader
def make_data_loader(data_root, config): # construct meta data meta = LJSpeechMetaData(data_root) # filter it! min_text_length = config["meta_data"]["min_text_length"] meta = FilterDataset(meta, lambda x: len(x[2]) >= min_text_length) # transform meta data into meta data c = config["transform"] transform = Transform( replace_pronunciation_prob=c["replace_pronunciation_prob"], sample_rate=c["sample_rate"], preemphasis=c["preemphasis"], n_fft=c["n_fft"], win_length=c["win_length"], hop_length=c["hop_length"], fmin=c["fmin"], fmax=c["fmax"], n_mels=c["n_mels"], min_level_db=c["min_level_db"], ref_level_db=c["ref_level_db"], max_norm=c["max_norm"], clip_norm=c["clip_norm"]) ljspeech = CacheDataset(TransformDataset(meta, transform)) # use meta data's text length as a sort key for the sampler batch_size = config["train"]["batch_size"] text_lengths = [len(example[2]) for example in meta] sampler = PartialyRandomizedSimilarTimeLengthSampler(text_lengths, batch_size) env = dg.parallel.ParallelEnv() num_trainers = env.nranks local_rank = env.local_rank sampler = BucketSampler( text_lengths, batch_size, num_trainers=num_trainers, rank=local_rank) # some model hyperparameters affect how we process data model_config = config["model"] collector = DataCollector( downsample_factor=model_config["downsample_factor"], r=model_config["outputs_per_step"]) ljspeech_loader = DataCargo( ljspeech, batch_fn=collector, batch_size=batch_size, sampler=sampler) loader = fluid.io.DataLoader.from_generator(capacity=10, return_list=True) loader.set_batch_generator( ljspeech_loader, places=fluid.framework._current_expected_place()) return loader
n_loop = model_config["n_loop"] n_layer = model_config["n_layer"] filter_size = model_config["filter_size"] context_size = 1 + n_layer * sum([filter_size**i for i in range(n_loop)]) print("context size is {} samples".format(context_size)) train_batch_fn = DataCollector(context_size, sample_rate, hop_length, train_clip_seconds) valid_batch_fn = DataCollector(context_size, sample_rate, hop_length, train_clip_seconds, valid=True) batch_size = data_config["batch_size"] train_cargo = DataCargo(ljspeech_train, train_batch_fn, batch_size, sampler=RandomSampler(ljspeech_train)) # only batch=1 for validation is enabled valid_cargo = DataCargo(ljspeech_valid, valid_batch_fn, batch_size=1, sampler=SequentialSampler(ljspeech_valid)) if not os.path.exists(args.output): os.makedirs(args.output) if args.device == -1: place = fluid.CPUPlace() else: place = fluid.CUDAPlace(args.device)
ljspeech = TransformDataset(meta, transform) # =========================dataiterator========================= # use meta data's text length as a sort key for the sampler train_config = config["train"] batch_size = train_config["batch_size"] text_lengths = [len(example[2]) for example in meta] sampler = PartialyRandomizedSimilarTimeLengthSampler(text_lengths, batch_size) # some hyperparameters affect how we process data, so create a data collector! model_config = config["model"] downsample_factor = model_config["downsample_factor"] r = model_config["outputs_per_step"] collector = DataCollector(downsample_factor=downsample_factor, r=r) ljspeech_loader = DataCargo( ljspeech, batch_fn=collector, batch_size=batch_size, sampler=sampler) # =========================model========================= if args.device == -1: place = fluid.CPUPlace() else: place = fluid.CUDAPlace(args.device) with dg.guard(place): # =========================model========================= n_speakers = model_config["n_speakers"] speaker_dim = model_config["speaker_embed_dim"] speaker_embed_std = model_config["speaker_embedding_weight_std"] n_vocab = en.n_vocab embed_dim = model_config["text_embed_dim"] linear_dim = 1 + n_fft // 2
parser.add_argument("--input", type=str, required=True, help="data path of the original data") args = parser.parse_args() with open(args.config, 'rt') as f: config = yaml.safe_load(f) print("========= Command Line Arguments ========") for k, v in vars(args).items(): print("{}: {}".format(k, v)) print("=========== Configurations ==============") for k in ["p_pronunciation", "batch_size"]: print("{}: {}".format(k, config[k])) ljspeech = LJSpeech(args.input) collate_fn = DataCollector(config["p_pronunciation"]) dg.enable_dygraph(fluid.CPUPlace()) sampler = PartialyRandomizedSimilarTimeLengthSampler(ljspeech.num_frames()) cargo = DataCargo(ljspeech, collate_fn, batch_size=config["batch_size"], sampler=sampler) loader = DataLoader\ .from_generator(capacity=5, return_list=True)\ .set_batch_generator(cargo) for i, batch in tqdm.tqdm(enumerate(loader)): continue