コード例 #1
0
    def from_spec(cls, spec, model):
        spec = spec.copy()
        hparams = spec['hparams']
        if 'use_chars_concat' in hparams and hparams['use_chars_concat']:
            raise NotImplementedError("Support for use_chars_concat has been removed")
        if 'sentence_max_len' not in hparams:
            hparams['sentence_max_len'] = 300
        if 'use_elmo' not in hparams:
            hparams['use_elmo'] = False
        if 'elmo_dropout' not in hparams:
            hparams['elmo_dropout'] = 0.5
        if 'use_bert' not in hparams:
            hparams['use_bert'] = False
        if 'use_bert_only' not in hparams:
            hparams['use_bert_only'] = False
        if 'predict_tags' not in hparams:
            hparams['predict_tags'] = False
        if 'bert_transliterate' not in hparams:
            hparams['bert_transliterate'] = ""

        spec['hparams'] = nkutil.HParams(**hparams)
        res = cls(**spec)
        if use_cuda:
            res.cpu()
        if not hparams['use_elmo']:
            res.load_state_dict(model)
        else:
            state = {k: v for k, v in res.state_dict().items() if k not in model}
            state.update(model)
            res.load_state_dict(state)
        if use_cuda:
            res.cuda()
        return res
コード例 #2
0
ファイル: parse_nk.py プロジェクト: baoy-nlp/FAParser
    def from_spec(cls, spec, model):
        spec = spec.copy()
        hparams = spec['hparams']
        if 'sentence_max_len' not in hparams:
            hparams['sentence_max_len'] = 300
        if 'use_elmo' not in hparams:
            hparams['use_elmo'] = False
        if 'elmo_dropout' not in hparams:
            hparams['elmo_dropout'] = 0.5

        spec['hparams'] = nkutil.HParams(**hparams)
        res = cls(**spec)
        if use_cuda:
            res.cpu()
        if not hparams['use_elmo']:
            res.load_state_dict(model)
        else:
            state = {
                k: v
                for k, v in res.state_dict().items() if k not in model
            }
            state.update(model)
            res.load_state_dict(state)
        if use_cuda:
            res.cuda()
        return res
コード例 #3
0
def make_hparams():
    return nkutil.HParams(
        max_len_train=0, # no length limit
        max_len_dev=0, # no length limit

        sentence_max_len=300,

        learning_rate=0.0008,
        learning_rate_warmup_steps=160,
        clip_grad_norm=0., #no clipping
        step_decay=True, # note that disabling step decay is not implemented
        step_decay_factor=0.5,
        step_decay_patience=5,
        max_consecutive_decays=3, # establishes a termination criterion

        partitioned=True,
        num_layers_position_only=0,

        num_layers=8,
        d_model=1024,
        num_heads=8,
        d_kv=64,
        d_ff=2048,
        d_label_hidden=250,
        d_tag_hidden=250,
        tag_loss_scale=5.0,

        attention_dropout=0.2,
        embedding_dropout=0.0,
        relu_dropout=0.1,
        residual_dropout=0.2,

        use_tags=False,
        use_words=False,
        use_chars_lstm=False,
        use_elmo=False,
        use_bert=False,
        use_bert_only=False,
        predict_tags=False,

        d_char_emb=32, # A larger value may be better for use_chars_lstm

        tag_emb_dropout=0.2,
        word_emb_dropout=0.4,
        morpho_emb_dropout=0.2,
        timing_dropout=0.0,
        char_lstm_input_dropout=0.2,
        elmo_dropout=0.5, # Note that this semi-stacks with morpho_emb_dropout!

        bert_model="bert-base-uncased",
        bert_do_lower_case=True,
        bert_transliterate="",
        use_syntactic=False,
        embed_layer=-1,
        word_level="first",
        freeze_embeddings=True,
        syntactic_checkpoint="checkpoints/model.pt",
        )
コード例 #4
0
def make_hparams():
    return nkutil.HParams(
        max_len_train=0,  # no length limit
        max_len_dev=0,  # no length limit
        sentence_max_len=300,
        learning_rate=0.0008,
        learning_rate_warmup_steps=160,
        clip_grad_norm=0.,  #no clipping
        step_decay=True,  # note that disabling step decay is not implemented
        step_decay_factor=0.5,
        step_decay_patience=5,
        max_consecutive_decays=3,  # establishes a termination criterion
        partitioned=True,
        num_layers_position_only=0,
        num_layers=8,
        d_model=1024,
        num_heads=8,
        d_kv=64,
        d_ff=2048,
        d_label_hidden=250,
        d_tag_hidden=250,
        tag_loss_scale=5.0,
        attention_dropout=0.2,
        embedding_dropout=0.0,
        relu_dropout=0.1,
        residual_dropout=0.2,
        use_tags=False,
        use_words=False,
        use_chars_lstm=False,
        use_elmo=False,
        use_bert=False,
        use_bert_only=False,
        predict_tags=False,
        d_char_emb=32,  # A larger value may be better for use_chars_lstm
        tag_emb_dropout=0.2,
        word_emb_dropout=0.4,
        morpho_emb_dropout=0.2,
        timing_dropout=0.0,
        char_lstm_input_dropout=0.2,
        elmo_dropout=0.5,  # Note that this semi-stacks with morpho_emb_dropout!
        bert_model="bert-base-uncased",
        bert_do_lower_case=True,
        bert_transliterate="",
        zero_empty=False,
        metric="dot",
        batch_cky=False,
        label_weights=False,
        no_mlp=False,
        use_label_weights=False,

        # Integration strategy of retrieved labels
        # - soft mixes in representation space
        # - hard mixes in score space
        integration="hard",  # ["soft", "hard"]
    )
コード例 #5
0
def make_hparams():
    return nkutil.HParams(
        max_len_train=0,  # no length limit
        max_len_dev=0,  # no length limit
        sentence_max_len=300,
        learning_rate=0.0008,
        learning_rate_warmup_steps=160,
        clip_grad_norm=0.,  #no clipping
        step_decay=True,  # note that disabling step decay is not implemented
        step_decay_factor=0.5,
        step_decay_patience=5,
        partitioned=True,
        num_layers_position_only=0,
        num_layers=8,
        d_model=1024,
        num_heads=8,
        d_kv=64,
        d_ff=2048,
        d_label_hidden=250,
        attention_dropout=0.2,
        embedding_dropout=0.0,
        relu_dropout=0.1,
        residual_dropout=0.2,
        use_tags=False,
        use_words=False,
        use_chars_lstm=False,
        use_chars_concat=False,
        use_elmo=False,
        d_char_emb=32,  # A larger value may be better for use_chars_lstm
        tag_emb_dropout=0.2,
        word_emb_dropout=0.4,
        morpho_emb_dropout=0.2,
        timing_dropout=0.0,
        char_lstm_input_dropout=0.2,
        elmo_dropout=0.5,  # Note that this semi-stacks with morpho_emb_dropout!
        task='language_modeling',
        fp16=False,
        vocab='/private/home/yinhanliu/BiLM_Training/vocab-2016-09-10.txt',
        path=
        '/checkpoint/yinhanliu/20180915/fairseq-lstm-train/layers_6/32_0.001/checkpoint_best.pt',
        sample_break_mode='eos',
        raw_text=False,
        data='/private/home/abaevski/data/gbw',
        output_dictionary_size=-1,
        character_embeddings=True,
    )
コード例 #6
0
ファイル: main.py プロジェクト: lumiqai/self-attentive-parser
def make_hparams():
    return nkutil.HParams(
        max_len_train=0, # no length limit
        max_len_dev=0, # no length limit

        sentence_max_len=300,

        learning_rate=0.0008,
        learning_rate_warmup_steps=160,
        clip_grad_norm=0., #no clipping
        step_decay=True, # note that disabling step decay is not implemented
        step_decay_factor=0.5,
        step_decay_patience=5,

        partitioned=True,
        num_layers_position_only=0,

        num_layers=8,
        d_model=1024,
        num_heads=8,
        d_kv=64,
        d_ff=2048,
        d_label_hidden=250,

        attention_dropout=0.2,
        embedding_dropout=0.0,
        relu_dropout=0.1,
        residual_dropout=0.2,

        use_tags=False,
        use_words=False,
        use_chars_lstm=False,
        use_chars_concat=False,
        use_elmo=False,

        d_char_emb=32, # A larger value may be better for use_chars_lstm

        tag_emb_dropout=0.2,
        word_emb_dropout=0.4,
        morpho_emb_dropout=0.2,
        timing_dropout=0.0,
        char_lstm_input_dropout=0.2,
        elmo_dropout=0.5, # Note that this semi-stacks with morpho_emb_dropout!
        )
コード例 #7
0
ファイル: hparams.py プロジェクト: Chenxuey20/Char2Prosody
def create_hparams():
    hparams =  nkutil.HParams(
        ################################
        # PolyPhonic Parameters        #
        ################################
        num_classes = 1665,    ##?
        class2idx = "./filelists/uni_class2idx.json",
        merge_cedict = "./filelists/universal_cedict.json",
        saved_model_path_poly = "./save/poly_only/97.98_model.pt",
        saved_model_path_structure_poly = "./save/poly_only_syntax_frozen/97.16_model.pt",

        train_file = "./filelists/train_polyphonic.sent",
        train_label = "./filelists/train_polyphonic.lb",
        val_file = "./filelists/dev_polyphonic.sent",
        val_label = "./filelists/dev_polyphonic.lb",
        test_file = "./filelists/test_polyphonic.sent",
        test_label = "./filelists/test_polyphonic.lb",

        poly_batch_size = 32,
        poly_max_length = 512,
        poly_epochs = 1500,
        poly_lr = 5e-5,
        use_output_mask = True,

        # control whether use syntax structure information in TTS
        poly_use_structure = True,
        tts_use_structure = True,
        encoder_input_dim=[812, 512, 512],
        ################################
        # Experiment Parameters        #
        ################################
        epochs=500,
        iters_per_checkpoint=500,
        seed=4321,   #?
        dynamic_loss_scaling=True,
        fp16_run=False,   ##?
        distributed_run=False,   ##?
        dist_backend="nccl",
        dist_url="tcp://localhost:54321",  ## str 这个URL指定了如何初始化互相通信的进程。
        cudnn_enabled=True,
        cudnn_benchmark=False,
        ignore_layers=['embedding.weight'],


        synth_batch_size = 1,
        ################################
        # Data Parameters             #
        ################################
        # load_mel_from_disk=False,
        load_mel_from_disk=True,     ##? 人工切换
        pretrain_model_path_structure = './models/ch_bert_bmes_dev=93.97.pt',
        saved_model_path_sandhi_structure = './save/poly_tts_CNN_syntax_frozen/96.39_model.pt',
        saved_model_path_sandhi = './save/poly_tts_CNN/96.84_model.pt',
        training_files='filelists/bznsyp_character_audio_text_train_filelist.txt',
        validation_files='filelists/bznsyp_character_audio_text_val_filelist.txt',
        mel_training_files='filelists/mel-bznsyp_character_audio_text_train_filelist.txt',
        mel_validation_files='filelists/mel-bznsyp_character_audio_text_val_filelist.txt',
        polyphone_dict_files = 'filelists/polyphone_dict.json',
        mask_dict_files = 'filelists/polyphone_mask.json',
        text_cleaners=['english_cleaners'],

        ################################
        # Audio Parameters             #
        ################################
        max_wav_value=32768.0,
        sampling_rate=16000,
        filter_length=1024,
        # hop_length=256,
        # win_length=1024,
        hop_length = 200,
        win_length = 800,
        n_mel_channels=80,
        mel_fmin=0.0,
        mel_fmax=8000.0,
        
        signal_normalization = True,
        allow_clipping_in_normalization = True,
        use_lws=False,
        max_abs_value = 4.,
        symmetric_mels = True,
        min_level_db = -100,
        ref_level_db = 20,
        magnitude_power = 2.,
        fmin = 55,
        fmax = 7600,
        power = 1.5,
        griffin_lim_iters = 60,
        preemphasize = True,
        preemphasis = 0.97,

        ################################
        # Model Parameters             #
        ################################
        n_yinsu_symbols=len(yinsu_symbols),
        n_character_symbols=len(character_symbols),
        n_pinyin_symbols=1665,
        character_symbols_embedding_dim=512,
        yinsu_symbols_embedding_dim=512,
        structure_feature_dim=300,
        # Encoder parameters
        encoder_kernel_size=5,
        encoder_n_convolutions=3,
        # encoder_embedding_dim=1836,  # = 1024 + 512 + 300
        # encoder_embedding_dim=1324,  # = 1024 + 300
        encoder_embedding_dim=512,     # 512
        encoder_output_dim=[512, 512, 512],

        # Decoder parameters
        n_frames_per_step=1,  # currently only 1 is supported
        decoder_rnn_dim=1024,
        prenet_dim=256,
        max_decoder_steps=1000,
        gate_threshold=0.5,
        p_attention_dropout=0.1,
        p_decoder_dropout=0.1,

        # Attention parameters
        attention_rnn_dim=1024,
        attention_dim=128,

        # Location Layer parameters
        attention_location_n_filters=32,
        attention_location_kernel_size=31,

        # Mel-post processing network parameters
        postnet_embedding_dim=512,
        postnet_kernel_size=5,
        postnet_n_convolutions=5,

        ################################
        # Optimization Hyperparameters #
        ################################
        use_saved_learning_rate=False,
        learning_rate=1e-4,
        weight_decay=1e-6,
        grad_clip_thresh=1.0,
        batch_size=32,
        mask_padding=True,  # set model's padded outputs to padded values
    )
    return hparams

    # hparams = {
        # "epochs": 500,
        # "iters_per_checkpoint": 500,
        # "seed": 1234,
        # "dynamic_loss_scaling": True,
        # "fp16_run": False,
        # "distributed_run": False,
        # "dist_backend": "nccl",
        # "dist_url": 'tcp://localhost:54321',
        # "cudnn_enabled": True,
        # "cudnn_benchmark": False,
        # "ignore_layers": ['embedding.weight'],

        # "load_mel_from_disk": False,
        # "training_files": 'filelists/bznsyp_character_audio_text_train_filelist.txt',
        # "validation_files": 'filelists/bznsyp_character_audio_text_val_filelist.txt',
        # "polyphone_dict_files": 'filelists/polyphone_dict.json',
        # "mask_dict_files": 'filelists/polyphone_mask.json',
        # "text_cleaners": ['english_cleaners'],

        # "max_wav_value": 32768.0,
        # "sampling_rate": 16000,
        # "filter_length": 1024,
        # # "hop_length": 256,
        # # "win_length": 1024,
        # "hop_length": 200,
        # "win_length": 800,
        # "n_mel_channels": 80,
        # "mel_fmin": 0.0,
        # "mel_fmax": 8000.0,

        # "n_yinsu_symbols": len(yinsu_symbols),
        # "n_character_symbols": len(character_symbols),
        # "n_pinyin_symbols": len(pinyin_symbols),
        # "character_symbols_embedding_dim": 512,
        # "yinsu_symbols_embedding_dim": 512,

        # "encoder_kernel_size": 5,
        # "encoder_n_convolutions": 3,
        # "encoder_embedding_dim": 512,

        # "n_frames_per_step": 1,  # currently only 1 is supported
        # "decoder_rnn_dim": 1024,
        # "prenet_dim": 256,
        # "max_decoder_steps": 1000,
        # "gate_threshold": 0.5,
        # "p_attention_dropout": 0.1,
        # "p_decoder_dropout": 0.1,

        # "attention_rnn_dim": 1024,
        # "attention_dim": 128,

        # "attention_location_n_filters": 32,
        # "attention_location_kernel_size": 31,

        # "postnet_embedding_dim": 512,
        # "postnet_kernel_size": 5,
        # "postnet_n_convolutions": 5,

        # "use_saved_learning_rate": False,
        # "learning_rate": 1e-3,
        # "weight_decay": 1e-6,
        # "grad_clip_thresh": 1.0,
        # "batch_size": 8,
        # "mask_padding": True  
    # }


    # hparams = tf.contrib.training.HParams(
        # ################################
        # # Experiment Parameters        #
        # ################################
        # epochs=500,
        # iters_per_checkpoint=500,
        # seed=1234,
        # dynamic_loss_scaling=True,
        # fp16_run=False,
        # distributed_run=False,
        # dist_backend="nccl",
        # dist_url="tcp://localhost:54321",
        # cudnn_enabled=True,
        # cudnn_benchmark=False,
        # ignore_layers=['embedding.weight'],

        # ################################
        # # Data Parameters             #
        # ################################
        # load_mel_from_disk=False,
        # training_files='filelists/bznsyp_character_audio_text_train_filelist.txt',
        # validation_files='filelists/bznsyp_character_audio_text_val_filelist.txt',
        # polyphone_dict_files = 'filelists/polyphone_dict.json',
        # mask_dict_files = 'filelists/polyphone_mask.json',
        # text_cleaners=['english_cleaners'],

        # ################################
        # # Audio Parameters             #
        # ################################
        # max_wav_value=32768.0,
        # sampling_rate=16000,
        # filter_length=1024,
        # # hop_length=256,
        # # win_length=1024,
        # hop_length = 200,
        # win_length = 800,
        # n_mel_channels=80,
        # mel_fmin=0.0,
        # mel_fmax=8000.0,

        # ################################
        # # Model Parameters             #
        # ################################
        # n_yinsu_symbols=len(yinsu_symbols),
        # n_character_symbols=len(character_symbols),
        # n_pinyin_symbols=len(pinyin_symbols),
        # character_symbols_embedding_dim=512,
        # yinsu_symbols_embedding_dim=512,

        # # Encoder parameters
        # encoder_kernel_size=5,
        # encoder_n_convolutions=3,
        # # encoder_embedding_dim=1836,  # = 1024 + 512 + 300
        # # encoder_embedding_dim=1324,  # = 1024 + 300
        # encoder_embedding_dim=512,  # = 1024 + 300 + 512

        # # Decoder parameters
        # n_frames_per_step=1,  # currently only 1 is supported
        # decoder_rnn_dim=1024,
        # prenet_dim=256,
        # max_decoder_steps=1000,
        # gate_threshold=0.5,
        # p_attention_dropout=0.1,
        # p_decoder_dropout=0.1,

        # # Attention parameters
        # attention_rnn_dim=1024,
        # attention_dim=128,

        # # Location Layer parameters
        # attention_location_n_filters=32,
        # attention_location_kernel_size=31,

        # # Mel-post processing network parameters
        # postnet_embedding_dim=512,
        # postnet_kernel_size=5,
        # postnet_n_convolutions=5,

        # ################################
        # # Optimization Hyperparameters #
        # ################################
        # use_saved_learning_rate=False,
        # learning_rate=1e-3,
        # weight_decay=1e-6,
        # grad_clip_thresh=1.0,
        # batch_size=8,
        # mask_padding=True  # set model's padded outputs to padded values
    # )