def create_data(config, data_path): dataset = LJSpeech(data_path) train_dataset = SliceDataset(dataset, config["valid_size"], len(dataset)) train_collator = DataCollector(config["p_pronunciation"]) train_sampler = RandomSampler(train_dataset) train_cargo = DataCargo(train_dataset, train_collator, batch_size=config["batch_size"], sampler=train_sampler) train_loader = DataLoader\ .from_generator(capacity=10, return_list=True)\ .set_batch_generator(train_cargo) valid_dataset = SliceDataset(dataset, 0, config["valid_size"]) valid_collector = DataCollector(1.) valid_sampler = SequentialSampler(valid_dataset) valid_cargo = DataCargo(valid_dataset, valid_collector, batch_size=1, sampler=valid_sampler) valid_loader = DataLoader\ .from_generator(capacity=2, return_list=True)\ .set_batch_generator(valid_cargo) return train_loader, valid_loader
filter_size = model_config["filter_size"] context_size = 1 + n_layer * sum([filter_size**i for i in range(n_loop)]) print("context size is {} samples".format(context_size)) train_batch_fn = DataCollector(context_size, sample_rate, hop_length, train_clip_seconds) valid_batch_fn = DataCollector(context_size, sample_rate, hop_length, train_clip_seconds, valid=True) batch_size = data_config["batch_size"] train_cargo = DataCargo(ljspeech_train, train_batch_fn, batch_size, sampler=RandomSampler(ljspeech_train)) # only batch=1 for validation is enabled valid_cargo = DataCargo(ljspeech_valid, valid_batch_fn, batch_size=1, sampler=SequentialSampler(ljspeech_valid)) if not os.path.exists(args.output): os.makedirs(args.output) if args.device == -1: place = fluid.CPUPlace() else: place = fluid.CUDAPlace(args.device)