def create_or_update_weights_map_main(base_dir: str, prep_name: str, weights_prep_name: str, template_map: Optional[str] = None): prep_dir = get_prepared_dir(base_dir, prep_name) assert os.path.isdir(prep_dir) orig_prep_dir = get_prepared_dir(base_dir, weights_prep_name) assert os.path.isdir(orig_prep_dir) logger = init_logger() add_console_out_to_logger(logger) logger.info(f"Creating/updating weights map for {weights_prep_name}...") if template_map is not None: _template_map = SymbolsMap.load(template_map) else: _template_map = None if weights_map_exists(prep_dir, weights_prep_name): existing_map = load_weights_map(prep_dir, weights_prep_name) else: existing_map = None weights_map, symbols = create_or_update_weights_map( orig=load_prep_symbol_converter(orig_prep_dir).get_all_symbols(), dest=load_prep_symbol_converter(prep_dir).get_all_symbols(), existing_map=existing_map, template_map=_template_map, ) save_weights_map(prep_dir, weights_prep_name, weights_map) save_weights_symbols(prep_dir, weights_prep_name, symbols)
def create_or_update_inference_map_main(base_dir: str, prep_name: str, template_map: Optional[str] = None): logger = init_logger() add_console_out_to_logger(logger) logger.info("Creating/updating inference map...") prep_dir = get_prepared_dir(base_dir, prep_name) assert os.path.isdir(prep_dir) all_symbols = get_all_symbols(prep_dir) if template_map is not None: _template_map = SymbolsMap.load(template_map) else: _template_map = None if infer_map_exists(prep_dir): existing_map = load_infer_map(prep_dir) else: existing_map = None infer_map, symbols = create_or_update_inference_map( orig=load_prep_symbol_converter(prep_dir).get_all_symbols(), dest=all_symbols, existing_map=existing_map, template_map=_template_map, ) save_infer_map(prep_dir, infer_map) save_infer_symbols(prep_dir, symbols)
def dl_pretrained(base_dir: str, train_name: str = DEFAULT_WAVEGLOW, prep_name: Optional[str] = None, version: int = 3): train_dir = get_train_dir(base_dir, train_name, create=True) assert os.path.isdir(train_dir) checkpoints_dir = get_checkpoints_dir(train_dir) dest_path = get_checkpoint_pretrained(checkpoints_dir) print("Downloading pretrained waveglow model from Nvida...") dl_wg( destination=dest_path, version=version ) print("Pretrained model is now beeing converted to be able to use it...") convert_glow( origin=dest_path, destination=dest_path, keep_orig=False ) if prep_name is not None: prep_dir = get_prepared_dir(base_dir, prep_name) wholeset = load_filelist(prep_dir) save_testset(train_dir, wholeset) save_valset(train_dir, wholeset) save_prep_name(train_dir, prep_name=prep_name)
def convert_model(base_dir: str, prep_name: str, model_path: str, custom_hparams: Optional[Dict[str, str]]): prep_dir = get_prepared_dir(base_dir, prep_name) convert_v1_to_v2_model(old_model_path=model_path, custom_hparams=custom_hparams, speakers=load_prep_speakers_json(prep_dir), accents=load_prep_accents_ids(prep_dir), symbols=load_prep_symbol_converter(prep_dir))
def map_to_prep_symbols(base_dir: str, prep_name: str, text_name: str, ignore_arcs: bool = True): prep_dir = get_prepared_dir(base_dir, prep_name, create=False) assert os.path.isdir(prep_dir) assert infer_map_exists(prep_dir) symb_map_path = get_infer_map_path(prep_dir) map_text(base_dir, prep_name, text_name, symb_map_path, ignore_arcs)
def validate_main(base_dir: str, train_name: str, waveglow: str = DEFAULT_WAVEGLOW, entry_id: Optional[int] = None, speaker: Optional[str] = None, ds: str = "val", custom_checkpoint: Optional[int] = None, sigma: float = DEFAULT_SIGMA, denoiser_strength: float = DEFAULT_DENOISER_STRENGTH, custom_tacotron_hparams: Optional[Dict[str, str]] = None, custom_waveglow_hparams: Optional[Dict[str, str]] = None): train_dir = get_train_dir(base_dir, train_name, create=False) assert os.path.isdir(train_dir) if ds == "val": data = load_valset(train_dir) elif ds == "test": data = load_testset(train_dir) else: assert False speaker_id: Optional[int] = None if speaker is not None: prep_name = load_prep_name(train_dir) prep_dir = get_prepared_dir(base_dir, prep_name, create=False) speakers = load_prep_speakers_json(prep_dir) speaker_id = speakers.get_id(speaker) entry = data.get_for_validation(entry_id, speaker_id) checkpoint_path, iteration = get_custom_or_last_checkpoint( get_checkpoints_dir(train_dir), custom_checkpoint) val_dir = get_val_dir(train_dir, entry, iteration) logger = prepare_logger(get_val_log(val_dir)) logger.info("Validating...") taco_checkpoint = CheckpointTacotron.load(checkpoint_path, logger) train_dir_wg = get_wg_train_dir(base_dir, waveglow, create=False) wg_checkpoint_path, _ = get_last_checkpoint(get_checkpoints_dir(train_dir_wg)) wg_checkpoint = CheckpointWaveglow.load(wg_checkpoint_path, logger) result = validate( tacotron_checkpoint=taco_checkpoint, waveglow_checkpoint=wg_checkpoint, sigma=sigma, denoiser_strength=denoiser_strength, entry=entry, logger=logger, custom_taco_hparams=custom_tacotron_hparams, custom_wg_hparams=custom_waveglow_hparams ) orig_mel = get_mel(entry.wav_path, custom_hparams=custom_waveglow_hparams) save_val_orig_wav(val_dir, entry.wav_path) save_val_orig_plot(val_dir, orig_mel) save_val_wav(val_dir, result.sampling_rate, result.wav) save_val_plot(val_dir, result.mel_outputs) save_val_pre_postnet_plot(val_dir, result.mel_outputs_postnet) save_val_alignments_sentence_plot(val_dir, result.alignments) save_val_comparison(val_dir) logger.info(f"Saved output to: {val_dir}")
def validate(base_dir: str, train_name: str, entry_id: Optional[int] = None, speaker: Optional[str] = None, ds: str = "val", custom_checkpoint: Optional[int] = None, sigma: float = 0.666, denoiser_strength: float = 0.00, custom_hparams: Optional[Dict[str, str]] = None): train_dir = get_train_dir(base_dir, train_name, create=False) assert os.path.isdir(train_dir) if ds == "val": data = load_valset(train_dir) elif ds == "test": data = load_testset(train_dir) else: raise Exception() speaker_id: Optional[int] = None if speaker is not None: prep_name = load_prep_name(train_dir) prep_dir = get_prepared_dir(base_dir, prep_name, create=False) speakers = load_prep_speakers_json(prep_dir) speaker_id = speakers.get_id(speaker) entry = data.get_for_validation(entry_id, speaker_id) checkpoint_path, iteration = get_custom_or_last_checkpoint( get_checkpoints_dir(train_dir), custom_checkpoint) val_dir = get_val_dir(train_dir, entry, iteration) logger = prepare_logger(get_val_log(val_dir)) logger.info(f"Validating {entry.wav_path}...") checkpoint = CheckpointWaveglow.load(checkpoint_path, logger) wav, wav_sr, wav_mel, orig_mel = infer(wav_path=entry.wav_path, denoiser_strength=denoiser_strength, sigma=sigma, checkpoint=checkpoint, custom_hparams=custom_hparams, logger=logger) save_val_wav(val_dir, wav_sr, wav) save_val_plot(val_dir, wav_mel) save_val_orig_wav(val_dir, entry.wav_path) save_val_orig_plot(val_dir, orig_mel) score = save_diff_plot(val_dir) save_v(val_dir) logger.info(f"Imagescore: {score*100}%") logger.info(f"Saved output to: {val_dir}")
def get_infer_sentences(base_dir: str, prep_name: str, text_name: str) -> InferSentenceList: prep_dir = get_prepared_dir(base_dir, prep_name, create=False) text_dir = get_text_dir(prep_dir, text_name, create=False) if not os.path.isdir(text_dir): print(f"The text '{text_name}' doesn't exist.") assert False result = InferSentenceList.from_sentences( sentences=load_text_csv(text_dir), accents=load_prep_accents_ids(prep_dir), symbols=load_text_symbol_converter(text_dir)) return result
def _accent_template(base_dir: str, prep_name: str, text_name: str): prep_dir = get_prepared_dir(base_dir, prep_name, create=False) text_dir = get_text_dir(prep_dir, text_name, create=False) if not os.path.isdir(text_dir): print("Please add text first.") else: print("Updating accent template...") accented_symbol_list = infer_accents_template( sentences=load_text_csv(text_dir), text_symbols=load_text_symbol_converter(text_dir), accent_ids=load_prep_accents_ids(prep_dir), ) _save_accents_csv(text_dir, accented_symbol_list)
def _check_for_unknown_symbols(base_dir: str, prep_name: str, text_name: str): infer_sents = get_infer_sentences(base_dir, prep_name, text_name) prep_dir = get_prepared_dir(base_dir, prep_name, create=False) logger = prepare_logger() unknown_symbols_exist = infer_sents.replace_unknown_symbols( model_symbols=load_prep_symbol_converter(prep_dir), logger=logger) if unknown_symbols_exist: logger.info( "Some symbols are not in the prepared dataset symbolset. You need to create an inference map and then apply it to the symbols." ) else: logger.info( "All symbols are in the prepared dataset symbolset. You can now synthesize this text." )
def accent_apply(base_dir: str, prep_name: str, text_name: str): prep_dir = get_prepared_dir(base_dir, prep_name, create=False) text_dir = get_text_dir(prep_dir, text_name, create=False) if not os.path.isdir(text_dir): print("Please add text first.") else: print("Applying accents...") updated_sentences = infer_accents_apply( sentences=load_text_csv(text_dir), accented_symbols=_load_accents_csv(text_dir), accent_ids=load_prep_accents_ids(prep_dir), ) print("\n" + updated_sentences.get_formatted( symbol_id_dict=load_text_symbol_converter(text_dir), accent_id_dict=load_prep_accents_ids(prep_dir))) _save_text_csv(text_dir, updated_sentences) _check_for_unknown_symbols(base_dir, prep_name, text_name)
def normalize_text(base_dir: str, prep_name: str, text_name: str): prep_dir = get_prepared_dir(base_dir, prep_name, create=False) text_dir = get_text_dir(prep_dir, text_name, create=False) if not os.path.isdir(text_dir): print("Please add text first.") else: print("Normalizing text...") symbol_ids, updated_sentences = infer_norm( sentences=load_text_csv(text_dir), text_symbols=load_text_symbol_converter(text_dir)) print("\n" + updated_sentences.get_formatted( symbol_id_dict=symbol_ids, accent_id_dict=load_prep_accents_ids(prep_dir))) _save_text_csv(text_dir, updated_sentences) save_text_symbol_converter(text_dir, symbol_ids) _accent_template(base_dir, prep_name, text_name) _check_for_unknown_symbols(base_dir, prep_name, text_name)
def add_text(base_dir: str, prep_name: str, text_name: str, filepath: str, lang: Language): prep_dir = get_prepared_dir(base_dir, prep_name, create=False) if not os.path.isdir(prep_dir): print("Please prepare data first.") else: print("Adding text...") symbol_ids, data = infer_add( text=read_text(filepath), lang=lang, ) print( "\n" + data.get_formatted(symbol_id_dict=symbol_ids, accent_id_dict=load_prep_accents_ids(prep_dir))) text_dir = get_text_dir(prep_dir, text_name, create=True) _save_text_csv(text_dir, data) save_text_symbol_converter(text_dir, symbol_ids) _accent_template(base_dir, prep_name, text_name) _check_for_unknown_symbols(base_dir, prep_name, text_name)
def start_new_training(base_dir: str, train_name: str, prep_name: str, test_size: float = 0.01, validation_size: float = 0.01, custom_hparams: Optional[Dict[str, str]] = None, split_seed: int = 1234, warm_start_train_name: Optional[str] = None, warm_start_checkpoint: Optional[int] = None): prep_dir = get_prepared_dir(base_dir, prep_name) wholeset = load_filelist(prep_dir) trainset, testset, valset = split_prepared_data_train_test_val( wholeset, test_size=test_size, validation_size=validation_size, seed=split_seed, shuffle=True) train_dir = get_train_dir(base_dir, train_name, create=True) save_trainset(train_dir, trainset) save_testset(train_dir, testset) save_valset(train_dir, valset) logs_dir = get_train_logs_dir(train_dir) logger = prepare_logger(get_train_log_file(logs_dir), reset=True) warm_model = try_load_checkpoint(base_dir=base_dir, train_name=warm_start_train_name, checkpoint=warm_start_checkpoint, logger=logger) save_prep_name(train_dir, prep_name) train( custom_hparams=custom_hparams, logdir=logs_dir, trainset=trainset, valset=valset, save_checkpoint_dir=get_checkpoints_dir(train_dir), debug_logger=logger, warm_model=warm_model, )
def map_text(base_dir: str, prep_name: str, text_name: str, symbols_map_path: str, ignore_arcs: bool = True): prep_dir = get_prepared_dir(base_dir, prep_name, create=False) text_dir = get_text_dir(prep_dir, text_name, create=False) if not os.path.isdir(text_dir): print("Please add text first.") else: symbol_ids, updated_sentences = sents_map( sentences=load_text_csv(text_dir), text_symbols=load_text_symbol_converter(text_dir), symbols_map=SymbolsMap.load(symbols_map_path), ignore_arcs=ignore_arcs) print("\n" + updated_sentences.get_formatted( symbol_id_dict=symbol_ids, accent_id_dict=load_prep_accents_ids(prep_dir))) _save_text_csv(text_dir, updated_sentences) save_text_symbol_converter(text_dir, symbol_ids) _accent_template(base_dir, prep_name, text_name) _check_for_unknown_symbols(base_dir, prep_name, text_name)
def ipa_convert_text(base_dir: str, prep_name: str, text_name: str, ignore_tones: bool = False, ignore_arcs: bool = True): prep_dir = get_prepared_dir(base_dir, prep_name, create=False) text_dir = get_text_dir(prep_dir, text_name, create=False) if not os.path.isdir(text_dir): print("Please add text first.") else: print("Converting text to IPA...") symbol_ids, updated_sentences = infer_convert_ipa( sentences=load_text_csv(text_dir), text_symbols=load_text_symbol_converter(text_dir), ignore_tones=ignore_tones, ignore_arcs=ignore_arcs) print("\n" + updated_sentences.get_formatted( symbol_id_dict=symbol_ids, accent_id_dict=load_prep_accents_ids(prep_dir))) _save_text_csv(text_dir, updated_sentences) save_text_symbol_converter(text_dir, symbol_ids) _accent_template(base_dir, prep_name, text_name) _check_for_unknown_symbols(base_dir, prep_name, text_name)
def eval_checkpoints_main(base_dir: str, train_name: str, select: int, min_it: int, max_it: int): train_dir = get_train_dir(base_dir, train_name, create=False) assert os.path.isdir(train_dir) prep_name = load_prep_name(train_dir) prep_dir = get_prepared_dir(base_dir, prep_name) symbols_conv = load_prep_symbol_converter(prep_dir) speakers = load_prep_speakers_json(prep_dir) accents = load_prep_accents_ids(prep_dir) logger = prepare_logger() eval_checkpoints(custom_hparams=None, checkpoint_dir=get_checkpoints_dir(train_dir), select=select, min_it=min_it, max_it=max_it, n_symbols=len(symbols_conv), n_speakers=len(speakers), n_accents=len(accents), valset=load_valset(train_dir), logger=logger)
def train_main(base_dir: str, train_name: str, prep_name: str, warm_start_train_name: Optional[str] = None, warm_start_checkpoint: Optional[int] = None, test_size: float = 0.01, validation_size: float = 0.05, custom_hparams: Optional[Dict[str, str]] = None, split_seed: int = 1234, weights_train_name: Optional[str] = None, weights_checkpoint: Optional[int] = None, use_weights_map: Optional[bool] = None, map_from_speaker: Optional[str] = None): prep_dir = get_prepared_dir(base_dir, prep_name) train_dir = get_train_dir(base_dir, train_name, create=True) logs_dir = get_train_logs_dir(train_dir) taco_logger = Tacotron2Logger(logs_dir) logger = prepare_logger(get_train_log_file(logs_dir), reset=True) checkpoint_logger = prepare_logger( log_file_path=get_train_checkpoints_log_file(logs_dir), logger=logging.getLogger("checkpoint-logger"), reset=True) save_prep_name(train_dir, prep_name) trainset, valset = split_dataset(prep_dir=prep_dir, train_dir=train_dir, test_size=test_size, validation_size=validation_size, split_seed=split_seed) weights_model = try_load_checkpoint(base_dir=base_dir, train_name=weights_train_name, checkpoint=weights_checkpoint, logger=logger) weights_map = None if use_weights_map is not None and use_weights_map: weights_train_dir = get_train_dir(base_dir, weights_train_name, False) weights_prep_name = load_prep_name(weights_train_dir) weights_map = load_weights_map(prep_dir, weights_prep_name) warm_model = try_load_checkpoint(base_dir=base_dir, train_name=warm_start_train_name, checkpoint=warm_start_checkpoint, logger=logger) save_callback = partial( save_checkpoint, save_checkpoint_dir=get_checkpoints_dir(train_dir), logger=logger, ) train( custom_hparams=custom_hparams, taco_logger=taco_logger, symbols=load_prep_symbol_converter(prep_dir), speakers=load_prep_speakers_json(prep_dir), accents=load_prep_accents_ids(prep_dir), trainset=trainset, valset=valset, save_callback=save_callback, weights_map=weights_map, weights_checkpoint=weights_model, warm_model=warm_model, map_from_speaker_name=map_from_speaker, logger=logger, checkpoint_logger=checkpoint_logger, )