Exemple #1
0
def test_synthesize():
    model_path = os.path.join("files", "tacotron2_statedict.pt")
    waveglow_path = os.path.join("files",
                                 "waveglow_256channels_universal_v5.pt")
    graph_path = "graph.png"
    audio_path = "synthesized_audio.wav"

    model = load_model(model_path)
    assert model

    waveglow = load_waveglow_model(waveglow_path)
    assert waveglow

    text = "hello everybody my name is david attenborough"
    inflect_engine = inflect.engine()
    synthesize(model,
               waveglow,
               text,
               inflect_engine,
               graph=graph_path,
               audio=audio_path)

    assert text_similarity(text, transcribe(audio_path)) > 0.5
    assert os.path.isfile(graph_path)
    assert os.path.isfile(audio_path)

    os.remove(graph_path)
    os.remove(audio_path)
Exemple #2
0
def synthesis_setup_post():
    global model, vocoder, vocoder_type

    vocoder_type = request.form["vocoder"]
    if vocoder_type == "hifigan":
        if request.files.get("hifigan-model") and request.files.get(
                "hifigan-config"):
            model_name = request.files["hifigan-model"].filename.split(".")[0]
            model_config = request.files["hifigan-config"].filename.split(
                ".")[0]
            hifigan_folder = os.path.join(paths["hifigan"],
                                          model_name + "-" + model_config)
            os.makedirs(hifigan_folder, exist_ok=False)
            model_path = os.path.join(hifigan_folder, "model.pt")
            model_config_path = os.path.join(hifigan_folder, "config.json")
            request.files["hifigan-model"].save(model_path)
            request.files["hifigan-config"].save(model_config_path)
        elif request.form.get("existing_hifigan"):
            hifigan_folder = os.path.join(paths["hifigan"],
                                          request.form["existing_hifigan"])
            model_path = os.path.join(hifigan_folder, "model.pt")
            model_config_path = os.path.join(hifigan_folder, "config.json")
        else:
            return render_template("synthesis-setup.html",
                                   error="No hifigan model chosen")

        vocoder = load_hifigan_model(model_path, model_config_path)
    elif vocoder_type == "waveglow":
        if request.files.get("waveglow"):
            model_path = os.path.join(paths["waveglow"],
                                      request.files["waveglow"].filename)
            request.files["waveglow"].save(model_path)
        elif request.form.get("existing_waveglow"):
            model_path = os.path.join(paths["waveglow"],
                                      request.form["existing_waveglow"])
        else:
            return render_template("synthesis-setup.html",
                                   error="No waveglow model chosen")

        vocoder = load_waveglow_model(model_path)
    else:
        return render_template("synthesis-setup.html",
                               error="Invalid vocoder selected")

    dataset_name = request.form["path"]
    checkpoint_folder = os.path.join(paths["models"], dataset_name)
    checkpoint = get_latest_checkpoint(checkpoint_folder)
    model = load_model(checkpoint)
    return redirect("/synthesis")
Exemple #3
0
def synthesis_setup_post():
    global model, waveglow_model

    if request.files.get("waveglow"):
        waveglow_path = os.path.join(paths["waveglow"],
                                     request.files["waveglow"].filename)
        request.files["waveglow"].save(waveglow_path)
    elif request.form.get("existing_waveglow"):
        waveglow_path = os.path.join(paths["waveglow"],
                                     request.form["existing_waveglow"])
    else:
        return render_template("synthesis-setup.html",
                               path=None,
                               error="No waveglow model chosen")

    dataset_name = request.form["path"]
    checkpoint_folder = os.path.join(paths["models"], dataset_name)
    checkpoint = get_latest_checkpoint(checkpoint_folder)
    model = load_model(checkpoint)
    waveglow_model = load_waveglow(waveglow_path)

    return redirect("/synthesis")
def synthesis_setup_post():
    global model, vocoder, symbols
    dataset_name = request.form["model"]
    language = request.form["language"]
    symbols = get_symbols(language)
    checkpoint_folder = os.path.join(paths["models"], dataset_name)
    checkpoint = os.path.join(checkpoint_folder, request.form["checkpoint"])
    model = load_model(checkpoint)

    if request.form["vocoder"].startswith("custom-"):
        checkpoint_iteration = request.form["vocoder"].split("-")[1]
        model_path = os.path.join(paths["hifigan_training"], dataset_name,
                                  f"g_{checkpoint_iteration}")
        model_config_path = CONFIG_FILE
    else:
        hifigan_folder = os.path.join(paths["hifigan"],
                                      request.form["vocoder"])
        model_path = os.path.join(hifigan_folder, "model.pt")
        model_config_path = os.path.join(hifigan_folder, "config.json")

    vocoder = Hifigan(model_path, model_config_path)
    return redirect("/synthesis")
def test_load_model():
    model_path = os.path.join("test_samples", "model.pt")
    model = load_model(model_path)
    assert isinstance(model, Tacotron2)
def train(
    audio_directory,
    output_directory,
    metadata_path=None,
    trainlist_path=None,
    vallist_path=None,
    symbols=DEFAULT_ALPHABET,
    checkpoint_path=None,
    transfer_learning_path=None,
    epochs=8000,
    batch_size=None,
    early_stopping=True,
    multi_gpu=True,
    iters_per_checkpoint=1000,
    iters_per_backup_checkpoint=10000,
    train_size=0.8,
    alignment_sentence="",
    logging=logging,
):
    """
    Trains the Tacotron2 model.

    Parameters
    ----------
    audio_directory : str
        Path to dataset clips
    output_directory : str
        Path to save checkpoints to
    metadata_path : str (optional)
        Path to label file
    trainlist_path : str (optional)
        Path to trainlist file
    vallist_path : str (optional)
        Path to vallist file
    symbols : list (optional)
        Valid symbols (default is English)
    checkpoint_path : str (optional)
        Path to a checkpoint to load (default is None)
    transfer_learning_path : str (optional)
        Path to a transfer learning checkpoint to use (default is None)
    epochs : int (optional)
        Number of epochs to run training for (default is 8000)
    batch_size : int (optional)
        Training batch size (calculated automatically if None)
    early_stopping : bool (optional)
        Whether to stop training when loss stops significantly decreasing (default is True)
    multi_gpu : bool (optional)
        Use multiple GPU's in parallel if available (default is True)
    iters_per_checkpoint : int (optional)
        How often temporary checkpoints are saved (number of iterations)
    iters_per_backup_checkpoint : int (optional)
        How often backup checkpoints are saved (number of iterations)
    train_size : float (optional)
        Percentage of samples to use for training (default is 80%/0.8)
    alignment_sentence : str (optional)
        Sentence for alignment graph to analyse performance
    logging : logging (optional)
        Logging object to write logs to

    Raises
    -------
    AssertionError
        If CUDA is not available or there is not enough GPU memory
    RuntimeError
        If the batch size is too high (causing CUDA out of memory)
    """
    assert metadata_path or (
        trainlist_path and vallist_path
    ), "You must give the path to your metadata file or trainlist/vallist files"
    assert torch.cuda.is_available(
    ), "You do not have Torch with CUDA installed. Please check CUDA & Pytorch install"
    os.makedirs(output_directory, exist_ok=True)

    available_memory_gb = get_available_memory()
    assert (
        available_memory_gb >= MINIMUM_MEMORY_GB
    ), f"Required GPU with at least {MINIMUM_MEMORY_GB}GB memory. (only {available_memory_gb}GB available)"

    if not batch_size:
        batch_size = get_batch_size(available_memory_gb)

    learning_rate = get_learning_rate(batch_size)
    logging.info(
        f"Setting batch size to {batch_size}, learning rate to {learning_rate}. ({available_memory_gb}GB GPU memory free)"
    )

    # Set seed
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    random.seed(SEED)

    # Setup GPU
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = False

    # Load model & optimizer
    logging.info("Loading model...")
    model = Tacotron2().cuda()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=WEIGHT_DECAY)
    criterion = Tacotron2Loss()
    logging.info("Loaded model")

    # Load data
    logging.info("Loading data...")
    if metadata_path:
        # metadata.csv
        filepaths_and_text = load_labels_file(metadata_path)
        random.shuffle(filepaths_and_text)
        train_files, test_files = train_test_split(filepaths_and_text,
                                                   train_size)
    else:
        # trainlist.txt & vallist.txt
        train_files = load_labels_file(trainlist_path)
        test_files = load_labels_file(vallist_path)
        filepaths_and_text = train_files + test_files

    validate_dataset(filepaths_and_text, audio_directory, symbols)
    trainset = VoiceDataset(train_files, audio_directory, symbols)
    valset = VoiceDataset(test_files, audio_directory, symbols)
    collate_fn = TextMelCollate()

    # Data loaders
    train_loader = DataLoader(trainset,
                              num_workers=0,
                              sampler=None,
                              batch_size=batch_size,
                              pin_memory=False,
                              collate_fn=collate_fn)
    val_loader = DataLoader(valset,
                            num_workers=0,
                            sampler=None,
                            batch_size=batch_size,
                            pin_memory=False,
                            collate_fn=collate_fn)
    logging.info("Loaded data")

    # Load checkpoint if one exists
    iteration = 0
    epoch_offset = 0

    if checkpoint_path:
        if transfer_learning_path:
            logging.info(
                "Ignoring transfer learning as checkpoint already exists")
        model, optimizer, iteration, epoch_offset = load_checkpoint(
            checkpoint_path, model, optimizer, train_loader)
        iteration += 1
        logging.info("Loaded checkpoint '{}' from iteration {}".format(
            checkpoint_path, iteration))
    elif transfer_learning_path:
        model = warm_start_model(transfer_learning_path, model, symbols)
        logging.info("Loaded transfer learning model '{}'".format(
            transfer_learning_path))
    else:
        logging.info("Generating first checkpoint...")

    # Enable Multi GPU
    if multi_gpu and torch.cuda.device_count() > 1:
        logging.info(f"Using {torch.cuda.device_count()} GPUs")
        model = nn.DataParallel(model)

    # Alignment sentence
    alignment_sequence = None
    alignment_folder = None
    if alignment_sentence:
        alignment_sequence = text_to_sequence(
            clean_text(alignment_sentence.strip(), symbols), symbols)
        alignment_folder = os.path.join(TRAINING_PATH,
                                        Path(output_directory).stem)
        os.makedirs(alignment_folder, exist_ok=True)

    model.train()
    validation_losses = []
    for epoch in range(epoch_offset, epochs):
        logging.info(f"Progress - {epoch}/{epochs}")
        for _, batch in enumerate(train_loader):
            start = time.perf_counter()
            for param_group in optimizer.param_groups:
                param_group["lr"] = learning_rate

            # Backpropogation
            model.zero_grad()
            y, y_pred = process_batch(batch, model)

            loss = criterion(y_pred, y)
            avgmax_attention = calc_avgmax_attention(batch[-1], batch[1],
                                                     y_pred[-1])
            reduced_loss = loss.item()
            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           GRAD_CLIP_THRESH)
            optimizer.step()

            duration = time.perf_counter() - start
            logging.info(
                "Status - [Epoch {}: Iteration {}] Train loss {:.5f} Attention score {:.5f} {:.2f}s/it"
                .format(epoch, iteration, reduced_loss, avgmax_attention,
                        duration))

            # Validate & save checkpoint
            if iteration % iters_per_checkpoint == 0:
                logging.info("Validating model")
                val_loss, avgmax_attention = validate(model, val_loader,
                                                      criterion, iteration)
                validation_losses.append(val_loss)
                logging.info(
                    "Saving model and optimizer state at iteration {} to {}. Validation score = {:.5f}, Attention score = {:.5f}"
                    .format(iteration, output_directory, val_loss,
                            avgmax_attention))
                checkpoint_path = save_checkpoint(
                    model,
                    optimizer,
                    learning_rate,
                    iteration,
                    symbols,
                    epoch,
                    output_directory,
                    iters_per_checkpoint,
                    iters_per_backup_checkpoint,
                )
                if alignment_sequence is not None:
                    try:
                        _, _, _, alignment = load_model(
                            checkpoint_path).inference(alignment_sequence)
                        graph_path = os.path.join(
                            alignment_folder,
                            "checkpoint_{}.png".format(iteration))
                        generate_graph(alignment,
                                       graph_path,
                                       heading=f"Iteration {iteration}")
                        graph = os.path.relpath(graph_path).replace("\\", "/")
                        logging.info(f"Alignment - {iteration}, {graph}")
                    except Exception:
                        logging.info(
                            "Failed to generate alignment sample, you may need to train for longer before this is possible"
                        )

            iteration += 1

        # Early Stopping
        if early_stopping and check_early_stopping(validation_losses):
            logging.info(
                "Stopping training early as loss is no longer decreasing")
            break

    logging.info(f"Progress - {epochs}/{epochs}")
    validate(model, val_loader, criterion, iteration)
    save_checkpoint(
        model,
        optimizer,
        learning_rate,
        iteration,
        symbols,
        epochs,
        output_directory,
        iters_per_checkpoint,
        iters_per_backup_checkpoint,
    )
    logging.info(
        "Saving model and optimizer state at iteration {} to {}".format(
            iteration, checkpoint_path))