Ejemplo n.º 1
0
    # Prepare sampler
    frame_lengths = Mel.file_data_source.frame_lengths
    sampler = PartialyRandomizedSimilarTimeLengthSampler(
        frame_lengths, batch_size=hparams.batch_size)

    # Dataset and Dataloader setup
    dataset = PyTorchDataset(X, Mel, Y)
    data_loader_dv3 = data_utils.DataLoader(
        dataset, batch_size=hparams.batch_size,
        num_workers=hparams.num_workers, sampler=sampler,
        collate_fn=collate_fn, pin_memory=hparams.pin_memory)
    print("dataloader for dv3 prepared")

    dv3.train._frontend = getattr(frontend, hparams.frontend)
    dv3_model = build_deepvoice_3(dv3_preset , checkpoint_dv3)
    print("Built dv3!")

    if use_cuda:
        dv3_model = dv3_model.cuda()

    dv3_optimizer = optim.Adam((dv3_model.get_trainable_parameters(),
                           lr=hparams.initial_learning_rate, betas=(
        hparams.adam_beta1, hparams.adam_beta2),
        eps=hparams.adam_eps, weight_decay=hparams.weight_decay)

    log_event_path = "log/run-test" + str(datetime.now()).replace(" ", "_")
    print("Log event path for dv3: {}".format(log_event_path))
    writer_dv3 = SummaryWriter(log_dir=log_event_path)

    # ENCODER
            #if i%8000=0:
            #scheduler.step()


def download_file(file_name=None):
    from google.colab import files
    files.download(file_name)


batch_size = 64

if __name__ == "__main__":

    #Load Deep Voice 3
    # Pre Trained Model
    dv3_model = build_deepvoice_3(True)

    all_speakers = get_cloned_voices()
    print("Cloning Texts are produced")

    speaker_embed = get_speaker_embeddings(dv3_model)
    #
    encoder = build_encoder()
    print("Encoder is built!")

    speech_data = Speech_Dataset(all_speakers, speaker_embed)

    #for i in range(5):
    #    sample = speech_data[i]
    #    print(sample[0].shape, sample[1].shape)
    #    print(sample[0], sample[1])