def from_spec(cls, spec, model): spec = spec.copy() hparams = spec['hparams'] if 'use_chars_concat' in hparams and hparams['use_chars_concat']: raise NotImplementedError("Support for use_chars_concat has been removed") if 'sentence_max_len' not in hparams: hparams['sentence_max_len'] = 300 if 'use_elmo' not in hparams: hparams['use_elmo'] = False if 'elmo_dropout' not in hparams: hparams['elmo_dropout'] = 0.5 if 'use_bert' not in hparams: hparams['use_bert'] = False if 'use_bert_only' not in hparams: hparams['use_bert_only'] = False if 'predict_tags' not in hparams: hparams['predict_tags'] = False if 'bert_transliterate' not in hparams: hparams['bert_transliterate'] = "" spec['hparams'] = nkutil.HParams(**hparams) res = cls(**spec) if use_cuda: res.cpu() if not hparams['use_elmo']: res.load_state_dict(model) else: state = {k: v for k, v in res.state_dict().items() if k not in model} state.update(model) res.load_state_dict(state) if use_cuda: res.cuda() return res
def from_spec(cls, spec, model): spec = spec.copy() hparams = spec['hparams'] if 'sentence_max_len' not in hparams: hparams['sentence_max_len'] = 300 if 'use_elmo' not in hparams: hparams['use_elmo'] = False if 'elmo_dropout' not in hparams: hparams['elmo_dropout'] = 0.5 spec['hparams'] = nkutil.HParams(**hparams) res = cls(**spec) if use_cuda: res.cpu() if not hparams['use_elmo']: res.load_state_dict(model) else: state = { k: v for k, v in res.state_dict().items() if k not in model } state.update(model) res.load_state_dict(state) if use_cuda: res.cuda() return res
def make_hparams(): return nkutil.HParams( max_len_train=0, # no length limit max_len_dev=0, # no length limit sentence_max_len=300, learning_rate=0.0008, learning_rate_warmup_steps=160, clip_grad_norm=0., #no clipping step_decay=True, # note that disabling step decay is not implemented step_decay_factor=0.5, step_decay_patience=5, max_consecutive_decays=3, # establishes a termination criterion partitioned=True, num_layers_position_only=0, num_layers=8, d_model=1024, num_heads=8, d_kv=64, d_ff=2048, d_label_hidden=250, d_tag_hidden=250, tag_loss_scale=5.0, attention_dropout=0.2, embedding_dropout=0.0, relu_dropout=0.1, residual_dropout=0.2, use_tags=False, use_words=False, use_chars_lstm=False, use_elmo=False, use_bert=False, use_bert_only=False, predict_tags=False, d_char_emb=32, # A larger value may be better for use_chars_lstm tag_emb_dropout=0.2, word_emb_dropout=0.4, morpho_emb_dropout=0.2, timing_dropout=0.0, char_lstm_input_dropout=0.2, elmo_dropout=0.5, # Note that this semi-stacks with morpho_emb_dropout! bert_model="bert-base-uncased", bert_do_lower_case=True, bert_transliterate="", use_syntactic=False, embed_layer=-1, word_level="first", freeze_embeddings=True, syntactic_checkpoint="checkpoints/model.pt", )
def make_hparams(): return nkutil.HParams( max_len_train=0, # no length limit max_len_dev=0, # no length limit sentence_max_len=300, learning_rate=0.0008, learning_rate_warmup_steps=160, clip_grad_norm=0., #no clipping step_decay=True, # note that disabling step decay is not implemented step_decay_factor=0.5, step_decay_patience=5, max_consecutive_decays=3, # establishes a termination criterion partitioned=True, num_layers_position_only=0, num_layers=8, d_model=1024, num_heads=8, d_kv=64, d_ff=2048, d_label_hidden=250, d_tag_hidden=250, tag_loss_scale=5.0, attention_dropout=0.2, embedding_dropout=0.0, relu_dropout=0.1, residual_dropout=0.2, use_tags=False, use_words=False, use_chars_lstm=False, use_elmo=False, use_bert=False, use_bert_only=False, predict_tags=False, d_char_emb=32, # A larger value may be better for use_chars_lstm tag_emb_dropout=0.2, word_emb_dropout=0.4, morpho_emb_dropout=0.2, timing_dropout=0.0, char_lstm_input_dropout=0.2, elmo_dropout=0.5, # Note that this semi-stacks with morpho_emb_dropout! bert_model="bert-base-uncased", bert_do_lower_case=True, bert_transliterate="", zero_empty=False, metric="dot", batch_cky=False, label_weights=False, no_mlp=False, use_label_weights=False, # Integration strategy of retrieved labels # - soft mixes in representation space # - hard mixes in score space integration="hard", # ["soft", "hard"] )
def make_hparams(): return nkutil.HParams( max_len_train=0, # no length limit max_len_dev=0, # no length limit sentence_max_len=300, learning_rate=0.0008, learning_rate_warmup_steps=160, clip_grad_norm=0., #no clipping step_decay=True, # note that disabling step decay is not implemented step_decay_factor=0.5, step_decay_patience=5, partitioned=True, num_layers_position_only=0, num_layers=8, d_model=1024, num_heads=8, d_kv=64, d_ff=2048, d_label_hidden=250, attention_dropout=0.2, embedding_dropout=0.0, relu_dropout=0.1, residual_dropout=0.2, use_tags=False, use_words=False, use_chars_lstm=False, use_chars_concat=False, use_elmo=False, d_char_emb=32, # A larger value may be better for use_chars_lstm tag_emb_dropout=0.2, word_emb_dropout=0.4, morpho_emb_dropout=0.2, timing_dropout=0.0, char_lstm_input_dropout=0.2, elmo_dropout=0.5, # Note that this semi-stacks with morpho_emb_dropout! task='language_modeling', fp16=False, vocab='/private/home/yinhanliu/BiLM_Training/vocab-2016-09-10.txt', path= '/checkpoint/yinhanliu/20180915/fairseq-lstm-train/layers_6/32_0.001/checkpoint_best.pt', sample_break_mode='eos', raw_text=False, data='/private/home/abaevski/data/gbw', output_dictionary_size=-1, character_embeddings=True, )
def make_hparams(): return nkutil.HParams( max_len_train=0, # no length limit max_len_dev=0, # no length limit sentence_max_len=300, learning_rate=0.0008, learning_rate_warmup_steps=160, clip_grad_norm=0., #no clipping step_decay=True, # note that disabling step decay is not implemented step_decay_factor=0.5, step_decay_patience=5, partitioned=True, num_layers_position_only=0, num_layers=8, d_model=1024, num_heads=8, d_kv=64, d_ff=2048, d_label_hidden=250, attention_dropout=0.2, embedding_dropout=0.0, relu_dropout=0.1, residual_dropout=0.2, use_tags=False, use_words=False, use_chars_lstm=False, use_chars_concat=False, use_elmo=False, d_char_emb=32, # A larger value may be better for use_chars_lstm tag_emb_dropout=0.2, word_emb_dropout=0.4, morpho_emb_dropout=0.2, timing_dropout=0.0, char_lstm_input_dropout=0.2, elmo_dropout=0.5, # Note that this semi-stacks with morpho_emb_dropout! )
def create_hparams(): hparams = nkutil.HParams( ################################ # PolyPhonic Parameters # ################################ num_classes = 1665, ##? class2idx = "./filelists/uni_class2idx.json", merge_cedict = "./filelists/universal_cedict.json", saved_model_path_poly = "./save/poly_only/97.98_model.pt", saved_model_path_structure_poly = "./save/poly_only_syntax_frozen/97.16_model.pt", train_file = "./filelists/train_polyphonic.sent", train_label = "./filelists/train_polyphonic.lb", val_file = "./filelists/dev_polyphonic.sent", val_label = "./filelists/dev_polyphonic.lb", test_file = "./filelists/test_polyphonic.sent", test_label = "./filelists/test_polyphonic.lb", poly_batch_size = 32, poly_max_length = 512, poly_epochs = 1500, poly_lr = 5e-5, use_output_mask = True, # control whether use syntax structure information in TTS poly_use_structure = True, tts_use_structure = True, encoder_input_dim=[812, 512, 512], ################################ # Experiment Parameters # ################################ epochs=500, iters_per_checkpoint=500, seed=4321, #? dynamic_loss_scaling=True, fp16_run=False, ##? distributed_run=False, ##? dist_backend="nccl", dist_url="tcp://localhost:54321", ## str 这个URL指定了如何初始化互相通信的进程。 cudnn_enabled=True, cudnn_benchmark=False, ignore_layers=['embedding.weight'], synth_batch_size = 1, ################################ # Data Parameters # ################################ # load_mel_from_disk=False, load_mel_from_disk=True, ##? 人工切换 pretrain_model_path_structure = './models/ch_bert_bmes_dev=93.97.pt', saved_model_path_sandhi_structure = './save/poly_tts_CNN_syntax_frozen/96.39_model.pt', saved_model_path_sandhi = './save/poly_tts_CNN/96.84_model.pt', training_files='filelists/bznsyp_character_audio_text_train_filelist.txt', validation_files='filelists/bznsyp_character_audio_text_val_filelist.txt', mel_training_files='filelists/mel-bznsyp_character_audio_text_train_filelist.txt', mel_validation_files='filelists/mel-bznsyp_character_audio_text_val_filelist.txt', polyphone_dict_files = 'filelists/polyphone_dict.json', mask_dict_files = 'filelists/polyphone_mask.json', text_cleaners=['english_cleaners'], ################################ # Audio Parameters # ################################ max_wav_value=32768.0, sampling_rate=16000, filter_length=1024, # hop_length=256, # win_length=1024, hop_length = 200, win_length = 800, n_mel_channels=80, mel_fmin=0.0, mel_fmax=8000.0, signal_normalization = True, allow_clipping_in_normalization = True, use_lws=False, max_abs_value = 4., symmetric_mels = True, min_level_db = -100, ref_level_db = 20, magnitude_power = 2., fmin = 55, fmax = 7600, power = 1.5, griffin_lim_iters = 60, preemphasize = True, preemphasis = 0.97, ################################ # Model Parameters # ################################ n_yinsu_symbols=len(yinsu_symbols), n_character_symbols=len(character_symbols), n_pinyin_symbols=1665, character_symbols_embedding_dim=512, yinsu_symbols_embedding_dim=512, structure_feature_dim=300, # Encoder parameters encoder_kernel_size=5, encoder_n_convolutions=3, # encoder_embedding_dim=1836, # = 1024 + 512 + 300 # encoder_embedding_dim=1324, # = 1024 + 300 encoder_embedding_dim=512, # 512 encoder_output_dim=[512, 512, 512], # Decoder parameters n_frames_per_step=1, # currently only 1 is supported decoder_rnn_dim=1024, prenet_dim=256, max_decoder_steps=1000, gate_threshold=0.5, p_attention_dropout=0.1, p_decoder_dropout=0.1, # Attention parameters attention_rnn_dim=1024, attention_dim=128, # Location Layer parameters attention_location_n_filters=32, attention_location_kernel_size=31, # Mel-post processing network parameters postnet_embedding_dim=512, postnet_kernel_size=5, postnet_n_convolutions=5, ################################ # Optimization Hyperparameters # ################################ use_saved_learning_rate=False, learning_rate=1e-4, weight_decay=1e-6, grad_clip_thresh=1.0, batch_size=32, mask_padding=True, # set model's padded outputs to padded values ) return hparams # hparams = { # "epochs": 500, # "iters_per_checkpoint": 500, # "seed": 1234, # "dynamic_loss_scaling": True, # "fp16_run": False, # "distributed_run": False, # "dist_backend": "nccl", # "dist_url": 'tcp://localhost:54321', # "cudnn_enabled": True, # "cudnn_benchmark": False, # "ignore_layers": ['embedding.weight'], # "load_mel_from_disk": False, # "training_files": 'filelists/bznsyp_character_audio_text_train_filelist.txt', # "validation_files": 'filelists/bznsyp_character_audio_text_val_filelist.txt', # "polyphone_dict_files": 'filelists/polyphone_dict.json', # "mask_dict_files": 'filelists/polyphone_mask.json', # "text_cleaners": ['english_cleaners'], # "max_wav_value": 32768.0, # "sampling_rate": 16000, # "filter_length": 1024, # # "hop_length": 256, # # "win_length": 1024, # "hop_length": 200, # "win_length": 800, # "n_mel_channels": 80, # "mel_fmin": 0.0, # "mel_fmax": 8000.0, # "n_yinsu_symbols": len(yinsu_symbols), # "n_character_symbols": len(character_symbols), # "n_pinyin_symbols": len(pinyin_symbols), # "character_symbols_embedding_dim": 512, # "yinsu_symbols_embedding_dim": 512, # "encoder_kernel_size": 5, # "encoder_n_convolutions": 3, # "encoder_embedding_dim": 512, # "n_frames_per_step": 1, # currently only 1 is supported # "decoder_rnn_dim": 1024, # "prenet_dim": 256, # "max_decoder_steps": 1000, # "gate_threshold": 0.5, # "p_attention_dropout": 0.1, # "p_decoder_dropout": 0.1, # "attention_rnn_dim": 1024, # "attention_dim": 128, # "attention_location_n_filters": 32, # "attention_location_kernel_size": 31, # "postnet_embedding_dim": 512, # "postnet_kernel_size": 5, # "postnet_n_convolutions": 5, # "use_saved_learning_rate": False, # "learning_rate": 1e-3, # "weight_decay": 1e-6, # "grad_clip_thresh": 1.0, # "batch_size": 8, # "mask_padding": True # } # hparams = tf.contrib.training.HParams( # ################################ # # Experiment Parameters # # ################################ # epochs=500, # iters_per_checkpoint=500, # seed=1234, # dynamic_loss_scaling=True, # fp16_run=False, # distributed_run=False, # dist_backend="nccl", # dist_url="tcp://localhost:54321", # cudnn_enabled=True, # cudnn_benchmark=False, # ignore_layers=['embedding.weight'], # ################################ # # Data Parameters # # ################################ # load_mel_from_disk=False, # training_files='filelists/bznsyp_character_audio_text_train_filelist.txt', # validation_files='filelists/bznsyp_character_audio_text_val_filelist.txt', # polyphone_dict_files = 'filelists/polyphone_dict.json', # mask_dict_files = 'filelists/polyphone_mask.json', # text_cleaners=['english_cleaners'], # ################################ # # Audio Parameters # # ################################ # max_wav_value=32768.0, # sampling_rate=16000, # filter_length=1024, # # hop_length=256, # # win_length=1024, # hop_length = 200, # win_length = 800, # n_mel_channels=80, # mel_fmin=0.0, # mel_fmax=8000.0, # ################################ # # Model Parameters # # ################################ # n_yinsu_symbols=len(yinsu_symbols), # n_character_symbols=len(character_symbols), # n_pinyin_symbols=len(pinyin_symbols), # character_symbols_embedding_dim=512, # yinsu_symbols_embedding_dim=512, # # Encoder parameters # encoder_kernel_size=5, # encoder_n_convolutions=3, # # encoder_embedding_dim=1836, # = 1024 + 512 + 300 # # encoder_embedding_dim=1324, # = 1024 + 300 # encoder_embedding_dim=512, # = 1024 + 300 + 512 # # Decoder parameters # n_frames_per_step=1, # currently only 1 is supported # decoder_rnn_dim=1024, # prenet_dim=256, # max_decoder_steps=1000, # gate_threshold=0.5, # p_attention_dropout=0.1, # p_decoder_dropout=0.1, # # Attention parameters # attention_rnn_dim=1024, # attention_dim=128, # # Location Layer parameters # attention_location_n_filters=32, # attention_location_kernel_size=31, # # Mel-post processing network parameters # postnet_embedding_dim=512, # postnet_kernel_size=5, # postnet_n_convolutions=5, # ################################ # # Optimization Hyperparameters # # ################################ # use_saved_learning_rate=False, # learning_rate=1e-3, # weight_decay=1e-6, # grad_clip_thresh=1.0, # batch_size=8, # mask_padding=True # set model's padded outputs to padded values # )