def test_synthesize(): model_path = os.path.join("files", "tacotron2_statedict.pt") waveglow_path = os.path.join("files", "waveglow_256channels_universal_v5.pt") graph_path = "graph.png" audio_path = "synthesized_audio.wav" model = load_model(model_path) assert model waveglow = load_waveglow_model(waveglow_path) assert waveglow text = "hello everybody my name is david attenborough" inflect_engine = inflect.engine() synthesize(model, waveglow, text, inflect_engine, graph=graph_path, audio=audio_path) assert text_similarity(text, transcribe(audio_path)) > 0.5 assert os.path.isfile(graph_path) assert os.path.isfile(audio_path) os.remove(graph_path) os.remove(audio_path)
def synthesis_setup_post(): global model, vocoder, vocoder_type vocoder_type = request.form["vocoder"] if vocoder_type == "hifigan": if request.files.get("hifigan-model") and request.files.get( "hifigan-config"): model_name = request.files["hifigan-model"].filename.split(".")[0] model_config = request.files["hifigan-config"].filename.split( ".")[0] hifigan_folder = os.path.join(paths["hifigan"], model_name + "-" + model_config) os.makedirs(hifigan_folder, exist_ok=False) model_path = os.path.join(hifigan_folder, "model.pt") model_config_path = os.path.join(hifigan_folder, "config.json") request.files["hifigan-model"].save(model_path) request.files["hifigan-config"].save(model_config_path) elif request.form.get("existing_hifigan"): hifigan_folder = os.path.join(paths["hifigan"], request.form["existing_hifigan"]) model_path = os.path.join(hifigan_folder, "model.pt") model_config_path = os.path.join(hifigan_folder, "config.json") else: return render_template("synthesis-setup.html", error="No hifigan model chosen") vocoder = load_hifigan_model(model_path, model_config_path) elif vocoder_type == "waveglow": if request.files.get("waveglow"): model_path = os.path.join(paths["waveglow"], request.files["waveglow"].filename) request.files["waveglow"].save(model_path) elif request.form.get("existing_waveglow"): model_path = os.path.join(paths["waveglow"], request.form["existing_waveglow"]) else: return render_template("synthesis-setup.html", error="No waveglow model chosen") vocoder = load_waveglow_model(model_path) else: return render_template("synthesis-setup.html", error="Invalid vocoder selected") dataset_name = request.form["path"] checkpoint_folder = os.path.join(paths["models"], dataset_name) checkpoint = get_latest_checkpoint(checkpoint_folder) model = load_model(checkpoint) return redirect("/synthesis")
def synthesis_setup_post(): global model, waveglow_model if request.files.get("waveglow"): waveglow_path = os.path.join(paths["waveglow"], request.files["waveglow"].filename) request.files["waveglow"].save(waveglow_path) elif request.form.get("existing_waveglow"): waveglow_path = os.path.join(paths["waveglow"], request.form["existing_waveglow"]) else: return render_template("synthesis-setup.html", path=None, error="No waveglow model chosen") dataset_name = request.form["path"] checkpoint_folder = os.path.join(paths["models"], dataset_name) checkpoint = get_latest_checkpoint(checkpoint_folder) model = load_model(checkpoint) waveglow_model = load_waveglow(waveglow_path) return redirect("/synthesis")
def synthesis_setup_post(): global model, vocoder, symbols dataset_name = request.form["model"] language = request.form["language"] symbols = get_symbols(language) checkpoint_folder = os.path.join(paths["models"], dataset_name) checkpoint = os.path.join(checkpoint_folder, request.form["checkpoint"]) model = load_model(checkpoint) if request.form["vocoder"].startswith("custom-"): checkpoint_iteration = request.form["vocoder"].split("-")[1] model_path = os.path.join(paths["hifigan_training"], dataset_name, f"g_{checkpoint_iteration}") model_config_path = CONFIG_FILE else: hifigan_folder = os.path.join(paths["hifigan"], request.form["vocoder"]) model_path = os.path.join(hifigan_folder, "model.pt") model_config_path = os.path.join(hifigan_folder, "config.json") vocoder = Hifigan(model_path, model_config_path) return redirect("/synthesis")
def test_load_model(): model_path = os.path.join("test_samples", "model.pt") model = load_model(model_path) assert isinstance(model, Tacotron2)
def train( audio_directory, output_directory, metadata_path=None, trainlist_path=None, vallist_path=None, symbols=DEFAULT_ALPHABET, checkpoint_path=None, transfer_learning_path=None, epochs=8000, batch_size=None, early_stopping=True, multi_gpu=True, iters_per_checkpoint=1000, iters_per_backup_checkpoint=10000, train_size=0.8, alignment_sentence="", logging=logging, ): """ Trains the Tacotron2 model. Parameters ---------- audio_directory : str Path to dataset clips output_directory : str Path to save checkpoints to metadata_path : str (optional) Path to label file trainlist_path : str (optional) Path to trainlist file vallist_path : str (optional) Path to vallist file symbols : list (optional) Valid symbols (default is English) checkpoint_path : str (optional) Path to a checkpoint to load (default is None) transfer_learning_path : str (optional) Path to a transfer learning checkpoint to use (default is None) epochs : int (optional) Number of epochs to run training for (default is 8000) batch_size : int (optional) Training batch size (calculated automatically if None) early_stopping : bool (optional) Whether to stop training when loss stops significantly decreasing (default is True) multi_gpu : bool (optional) Use multiple GPU's in parallel if available (default is True) iters_per_checkpoint : int (optional) How often temporary checkpoints are saved (number of iterations) iters_per_backup_checkpoint : int (optional) How often backup checkpoints are saved (number of iterations) train_size : float (optional) Percentage of samples to use for training (default is 80%/0.8) alignment_sentence : str (optional) Sentence for alignment graph to analyse performance logging : logging (optional) Logging object to write logs to Raises ------- AssertionError If CUDA is not available or there is not enough GPU memory RuntimeError If the batch size is too high (causing CUDA out of memory) """ assert metadata_path or ( trainlist_path and vallist_path ), "You must give the path to your metadata file or trainlist/vallist files" assert torch.cuda.is_available( ), "You do not have Torch with CUDA installed. Please check CUDA & Pytorch install" os.makedirs(output_directory, exist_ok=True) available_memory_gb = get_available_memory() assert ( available_memory_gb >= MINIMUM_MEMORY_GB ), f"Required GPU with at least {MINIMUM_MEMORY_GB}GB memory. (only {available_memory_gb}GB available)" if not batch_size: batch_size = get_batch_size(available_memory_gb) learning_rate = get_learning_rate(batch_size) logging.info( f"Setting batch size to {batch_size}, learning rate to {learning_rate}. ({available_memory_gb}GB GPU memory free)" ) # Set seed torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) random.seed(SEED) # Setup GPU torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = False # Load model & optimizer logging.info("Loading model...") model = Tacotron2().cuda() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=WEIGHT_DECAY) criterion = Tacotron2Loss() logging.info("Loaded model") # Load data logging.info("Loading data...") if metadata_path: # metadata.csv filepaths_and_text = load_labels_file(metadata_path) random.shuffle(filepaths_and_text) train_files, test_files = train_test_split(filepaths_and_text, train_size) else: # trainlist.txt & vallist.txt train_files = load_labels_file(trainlist_path) test_files = load_labels_file(vallist_path) filepaths_and_text = train_files + test_files validate_dataset(filepaths_and_text, audio_directory, symbols) trainset = VoiceDataset(train_files, audio_directory, symbols) valset = VoiceDataset(test_files, audio_directory, symbols) collate_fn = TextMelCollate() # Data loaders train_loader = DataLoader(trainset, num_workers=0, sampler=None, batch_size=batch_size, pin_memory=False, collate_fn=collate_fn) val_loader = DataLoader(valset, num_workers=0, sampler=None, batch_size=batch_size, pin_memory=False, collate_fn=collate_fn) logging.info("Loaded data") # Load checkpoint if one exists iteration = 0 epoch_offset = 0 if checkpoint_path: if transfer_learning_path: logging.info( "Ignoring transfer learning as checkpoint already exists") model, optimizer, iteration, epoch_offset = load_checkpoint( checkpoint_path, model, optimizer, train_loader) iteration += 1 logging.info("Loaded checkpoint '{}' from iteration {}".format( checkpoint_path, iteration)) elif transfer_learning_path: model = warm_start_model(transfer_learning_path, model, symbols) logging.info("Loaded transfer learning model '{}'".format( transfer_learning_path)) else: logging.info("Generating first checkpoint...") # Enable Multi GPU if multi_gpu and torch.cuda.device_count() > 1: logging.info(f"Using {torch.cuda.device_count()} GPUs") model = nn.DataParallel(model) # Alignment sentence alignment_sequence = None alignment_folder = None if alignment_sentence: alignment_sequence = text_to_sequence( clean_text(alignment_sentence.strip(), symbols), symbols) alignment_folder = os.path.join(TRAINING_PATH, Path(output_directory).stem) os.makedirs(alignment_folder, exist_ok=True) model.train() validation_losses = [] for epoch in range(epoch_offset, epochs): logging.info(f"Progress - {epoch}/{epochs}") for _, batch in enumerate(train_loader): start = time.perf_counter() for param_group in optimizer.param_groups: param_group["lr"] = learning_rate # Backpropogation model.zero_grad() y, y_pred = process_batch(batch, model) loss = criterion(y_pred, y) avgmax_attention = calc_avgmax_attention(batch[-1], batch[1], y_pred[-1]) reduced_loss = loss.item() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), GRAD_CLIP_THRESH) optimizer.step() duration = time.perf_counter() - start logging.info( "Status - [Epoch {}: Iteration {}] Train loss {:.5f} Attention score {:.5f} {:.2f}s/it" .format(epoch, iteration, reduced_loss, avgmax_attention, duration)) # Validate & save checkpoint if iteration % iters_per_checkpoint == 0: logging.info("Validating model") val_loss, avgmax_attention = validate(model, val_loader, criterion, iteration) validation_losses.append(val_loss) logging.info( "Saving model and optimizer state at iteration {} to {}. Validation score = {:.5f}, Attention score = {:.5f}" .format(iteration, output_directory, val_loss, avgmax_attention)) checkpoint_path = save_checkpoint( model, optimizer, learning_rate, iteration, symbols, epoch, output_directory, iters_per_checkpoint, iters_per_backup_checkpoint, ) if alignment_sequence is not None: try: _, _, _, alignment = load_model( checkpoint_path).inference(alignment_sequence) graph_path = os.path.join( alignment_folder, "checkpoint_{}.png".format(iteration)) generate_graph(alignment, graph_path, heading=f"Iteration {iteration}") graph = os.path.relpath(graph_path).replace("\\", "/") logging.info(f"Alignment - {iteration}, {graph}") except Exception: logging.info( "Failed to generate alignment sample, you may need to train for longer before this is possible" ) iteration += 1 # Early Stopping if early_stopping and check_early_stopping(validation_losses): logging.info( "Stopping training early as loss is no longer decreasing") break logging.info(f"Progress - {epochs}/{epochs}") validate(model, val_loader, criterion, iteration) save_checkpoint( model, optimizer, learning_rate, iteration, symbols, epochs, output_directory, iters_per_checkpoint, iters_per_backup_checkpoint, ) logging.info( "Saving model and optimizer state at iteration {} to {}".format( iteration, checkpoint_path))