Beispiel #1
0
def create_data(config, data_path):
    dataset = LJSpeech(data_path)

    train_dataset = SliceDataset(dataset, config["valid_size"], len(dataset))
    train_collator = DataCollector(config["p_pronunciation"])
    train_sampler = RandomSampler(train_dataset)
    train_cargo = DataCargo(train_dataset,
                            train_collator,
                            batch_size=config["batch_size"],
                            sampler=train_sampler)
    train_loader = DataLoader\
                 .from_generator(capacity=10, return_list=True)\
                 .set_batch_generator(train_cargo)

    valid_dataset = SliceDataset(dataset, 0, config["valid_size"])
    valid_collector = DataCollector(1.)
    valid_sampler = SequentialSampler(valid_dataset)
    valid_cargo = DataCargo(valid_dataset,
                            valid_collector,
                            batch_size=1,
                            sampler=valid_sampler)
    valid_loader = DataLoader\
                 .from_generator(capacity=2, return_list=True)\
                 .set_batch_generator(valid_cargo)
    return train_loader, valid_loader
Beispiel #2
0
    def __init__(self, config, nranks, rank):
        place = fluid.CUDAPlace(rank) if config.use_gpu else fluid.CPUPlace()

        # Whole LJSpeech dataset.
        ds = Dataset(config)

        # Split into train and valid dataset.
        indices = list(range(len(ds)))
        train_indices = indices[config.valid_size:]
        valid_indices = indices[:config.valid_size]
        random.shuffle(train_indices)

        # Train dataset.
        trainset = Subset(ds, train_indices, valid=False)
        sampler = DistributedSampler(len(trainset), nranks, rank)
        total_bs = config.batch_size
        assert total_bs % nranks == 0
        train_sampler = BatchSampler(sampler,
                                     total_bs // nranks,
                                     drop_last=True)
        trainloader = DataCargo(trainset, batch_sampler=train_sampler)

        trainreader = fluid.io.PyReader(capacity=50, return_list=True)
        trainreader.decorate_batch_generator(trainloader, place)
        self.trainloader = (data for _ in iter(int, 1)
                            for data in trainreader())

        # Valid dataset.
        validset = Subset(ds, valid_indices, valid=True)
        # Currently only support batch_size = 1 for valid loader.
        validloader = DataCargo(validset, batch_size=1, shuffle=False)

        validreader = fluid.io.PyReader(capacity=20, return_list=True)
        validreader.decorate_batch_generator(validloader, place)
        self.validloader = validreader
Beispiel #3
0
def make_data_loader(data_root, config):
    # construct meta data
    meta = LJSpeechMetaData(data_root)

    # filter it!
    min_text_length = config["meta_data"]["min_text_length"]
    meta = FilterDataset(meta, lambda x: len(x[2]) >= min_text_length)

    # transform meta data into meta data
    c = config["transform"]
    transform = Transform(
        replace_pronunciation_prob=c["replace_pronunciation_prob"],
        sample_rate=c["sample_rate"],
        preemphasis=c["preemphasis"],
        n_fft=c["n_fft"],
        win_length=c["win_length"],
        hop_length=c["hop_length"],
        fmin=c["fmin"],
        fmax=c["fmax"],
        n_mels=c["n_mels"],
        min_level_db=c["min_level_db"],
        ref_level_db=c["ref_level_db"],
        max_norm=c["max_norm"],
        clip_norm=c["clip_norm"])
    ljspeech = CacheDataset(TransformDataset(meta, transform))

    # use meta data's text length as a sort key for the sampler
    batch_size = config["train"]["batch_size"]
    text_lengths = [len(example[2]) for example in meta]
    sampler = PartialyRandomizedSimilarTimeLengthSampler(text_lengths,
                                                         batch_size)

    env = dg.parallel.ParallelEnv()
    num_trainers = env.nranks
    local_rank = env.local_rank
    sampler = BucketSampler(
        text_lengths, batch_size, num_trainers=num_trainers, rank=local_rank)

    # some model hyperparameters affect how we process data
    model_config = config["model"]
    collector = DataCollector(
        downsample_factor=model_config["downsample_factor"],
        r=model_config["outputs_per_step"])
    ljspeech_loader = DataCargo(
        ljspeech, batch_fn=collector, batch_size=batch_size, sampler=sampler)
    loader = fluid.io.DataLoader.from_generator(capacity=10, return_list=True)
    loader.set_batch_generator(
        ljspeech_loader, places=fluid.framework._current_expected_place())
    return loader
Beispiel #4
0
    n_loop = model_config["n_loop"]
    n_layer = model_config["n_layer"]
    filter_size = model_config["filter_size"]
    context_size = 1 + n_layer * sum([filter_size**i for i in range(n_loop)])
    print("context size is {} samples".format(context_size))
    train_batch_fn = DataCollector(context_size, sample_rate, hop_length,
                                   train_clip_seconds)
    valid_batch_fn = DataCollector(context_size,
                                   sample_rate,
                                   hop_length,
                                   train_clip_seconds,
                                   valid=True)

    batch_size = data_config["batch_size"]
    train_cargo = DataCargo(ljspeech_train,
                            train_batch_fn,
                            batch_size,
                            sampler=RandomSampler(ljspeech_train))

    # only batch=1 for validation is enabled
    valid_cargo = DataCargo(ljspeech_valid,
                            valid_batch_fn,
                            batch_size=1,
                            sampler=SequentialSampler(ljspeech_valid))

    if not os.path.exists(args.output):
        os.makedirs(args.output)

    if args.device == -1:
        place = fluid.CPUPlace()
    else:
        place = fluid.CUDAPlace(args.device)
Beispiel #5
0
    ljspeech = TransformDataset(meta, transform)

    # =========================dataiterator=========================
    # use meta data's text length as a sort key for the sampler
    train_config = config["train"]
    batch_size = train_config["batch_size"]
    text_lengths = [len(example[2]) for example in meta]
    sampler = PartialyRandomizedSimilarTimeLengthSampler(text_lengths,
                                                         batch_size)

    # some hyperparameters affect how we process data, so create a data collector!
    model_config = config["model"]
    downsample_factor = model_config["downsample_factor"]
    r = model_config["outputs_per_step"]
    collector = DataCollector(downsample_factor=downsample_factor, r=r)
    ljspeech_loader = DataCargo(
        ljspeech, batch_fn=collector, batch_size=batch_size, sampler=sampler)

    # =========================model=========================
    if args.device == -1:
        place = fluid.CPUPlace()
    else:
        place = fluid.CUDAPlace(args.device)

    with dg.guard(place):
        # =========================model=========================
        n_speakers = model_config["n_speakers"]
        speaker_dim = model_config["speaker_embed_dim"]
        speaker_embed_std = model_config["speaker_embedding_weight_std"]
        n_vocab = en.n_vocab
        embed_dim = model_config["text_embed_dim"]
        linear_dim = 1 + n_fft // 2
Beispiel #6
0
    parser.add_argument("--input",
                        type=str,
                        required=True,
                        help="data path of the original data")
    args = parser.parse_args()
    with open(args.config, 'rt') as f:
        config = yaml.safe_load(f)

    print("========= Command Line Arguments ========")
    for k, v in vars(args).items():
        print("{}: {}".format(k, v))
    print("=========== Configurations ==============")
    for k in ["p_pronunciation", "batch_size"]:
        print("{}: {}".format(k, config[k]))

    ljspeech = LJSpeech(args.input)
    collate_fn = DataCollector(config["p_pronunciation"])

    dg.enable_dygraph(fluid.CPUPlace())
    sampler = PartialyRandomizedSimilarTimeLengthSampler(ljspeech.num_frames())
    cargo = DataCargo(ljspeech,
                      collate_fn,
                      batch_size=config["batch_size"],
                      sampler=sampler)
    loader = DataLoader\
           .from_generator(capacity=5, return_list=True)\
           .set_batch_generator(cargo)

    for i, batch in tqdm.tqdm(enumerate(loader)):
        continue