예제 #1
0
def make_hparams():
    return makehp.HParams(
        max_len_train=0,  # no length limit
        max_len_dev=0,  # no length limit
        sentence_max_len=300,
        learning_rate=0.0008,
        learning_rate_warmup_steps=160,
        clip_grad_norm=0.0,  # no clipping
        step_decay=True,  # note that disabling step decay is not implemented
        step_decay_factor=0.5,
        step_decay_patience=5,
        partitioned=True,
        use_cat=False,
        const_lada=0.5,
        num_layers=12,
        d_model=1024,
        num_heads=8,
        d_kv=64,
        d_ff=2048,
        d_label_hidden=250,
        d_biaffine=1024,
        attention_dropout=0.33,
        embedding_dropout=0.33,
        relu_dropout=0.33,
        residual_dropout=0.33,
        use_tags=False,
        use_words=False,
        use_elmo=False,
        use_bert=False,
        use_xlnet=False,
        use_bert_only=False,
        use_chars_lstm=False,
        dataset="ptb",
        model_name="joint",
        # ['glove','sskip','random']
        embedding_type="glove",
        embedding_path="./data/glove/glove.gz",
        punctuation="."
        "``"
        "''"
        ":"
        ",",
        d_char_emb=64,
        tag_emb_dropout=0.33,
        word_emb_dropout=0.33,
        morpho_emb_dropout=0.33,
        timing_dropout=0.0,
        char_lstm_input_dropout=0.33,
        elmo_dropout=0.5,
        bert_model_path="./data/bert/large-uncased",
        bert_do_lower_case=True,
        bert_transliterate="",
        xlnet_model="xlnet-large-cased",
        xlnet_do_lower_case=False,
        pad_left=False,
    )
예제 #2
0
def make_hparams():
    return makehp.HParams(
        max_len_train=0,  # no length limit
        max_len_dev=0,  # no length limit
        sentence_max_len=300,
        learning_rate=0.0008,
        learning_rate_warmup_steps=160,
        clip_grad_norm=0.,  #no clipping
        step_decay=True,  # note that disabling step decay is not implemented
        step_decay_factor=0.5,
        step_decay_patience=5,
        partitioned=True,
        use_cat=False,
        const_lada=0.5,
        num_layers=12,
        d_model=1024,
        num_heads=8,
        d_kv=64,
        d_ff=2048,
        d_label_hidden=250,
        d_biaffine=1024,
        attention_dropout=0.2,
        embedding_dropout=0.2,
        relu_dropout=0.2,
        residual_dropout=0.2,
        use_tags=False,
        use_words=False,
        use_elmo=False,
        use_bert=False,
        use_xlnet=False,
        use_bert_only=False,
        use_chars_lstm=False,
        dataset='ptb',
        model_name="joint",
        embedding_type='random',
        #['glove','sskip','random']
        embedding_path="/data/glove.gz",
        punctuation='.'
        '``'
        "''"
        ':'
        ',',
        d_char_emb=64,  # A larger value may be better for use_chars_lstm
        tag_emb_dropout=0.2,
        word_emb_dropout=0.4,
        morpho_emb_dropout=0.2,
        timing_dropout=0.0,
        char_lstm_input_dropout=0.2,
        elmo_dropout=0.5,  # Note that this semi-stacks with morpho_emb_dropout!
        bert_model="bert-large-uncased",
        bert_do_lower_case=True,
        bert_transliterate="",
        xlnet_model="xlnet-large-cased",
        xlnet_do_lower_case=False,
        pad_left=False,
    )
예제 #3
0
파일: Zmodel.py 프로젝트: cooelf/LIMIT-BERT
    def from_spec(cls, spec, model):
        spec = spec.copy()
        hparams = spec['hparams']
        if 'joint_pos' not in hparams:
            hparams['joint_pos'] = False
        if 'use_xlnet' not in hparams:
            hparams['use_xlnet'] = False
        if 'model' not in hparams:
            hparams['model'] = "bert"
        hparams['bert_model'] = "bert-large-uncased-whole-word-masking"

        spec['hparams'] = makehp.HParams(**hparams)
        res = cls(**spec)
        # if use_cuda:
        #     res.cpu()
        res.load_state_dict(model)
        # if use_cuda:
        #     res.cuda()
        return res
예제 #4
0
def make_hparams():
    return makehp.HParams(
        max_len_train=0,  # no length limit
        max_len_dev=0,  # no length limit
        max_seq_length=256,
        learning_rate=3e-5,
        # learning_rate_warmup_steps=160,
        # clip_grad_norm=0., #no clipping
        # step_decay=True, # note that disabling step decay is not implemented
        # step_decay_factor=0.5,
        # step_decay_patience=5,

        #Probability setting
        p_ptb=0.5,
        p_constmask=5,
        p_srlmask=1,
        p_wordmask=1,
        p_tokenmask=0,

        #Joint setting
        partitioned=True,
        use_only_bert=False,
        # use_cat=False,
        # use_lstm = False,
        joint_syn=True,
        joint_srl=True,
        joint_pos=True,

        #SRL setting
        const_lada=0.5,
        labmda_verb=0.6,
        labmda_span=0.6,
        max_num_span=300,
        max_num_verb=30,
        use_srl_jointdecode=True,

        #Task layer setting
        num_layers=2,
        d_model=1024,
        num_heads=8,
        d_kv=64,
        d_ff=2048,
        d_label_hidden=250,
        d_biaffine=1024,
        d_score_hidden=256,
        d_span=512,
        attention_dropout=0.2,
        embedding_dropout=0.2,
        relu_dropout=0.2,
        residual_dropout=0.2,
        morpho_emb_dropout=0.2,
        timing_dropout=0.0,
        model_name="multitask-bert",
        punctuation='.'
        '``'
        "''"
        ':'
        ',',
        model="bert",
        use_sparse=False,
        use_electra=False,
        use_alltoken=False,
        bert_model="",  #""bert-base-uncased",
        bert_do_lower_case=True,
        bert_transliterate="",
    )
예제 #5
0
def make_hparams():
    return makehp.HParams(
        max_len_train=0,  # no length limit
        max_len_dev=0,  # no length limit
        sentence_max_len=300,
        learning_rate=0.0008,
        learning_rate_warmup_steps=160,
        clip_grad_norm=0.,  #no clipping
        step_decay=True,  # note that disabling step decay is not implemented
        step_decay_factor=0.5,
        step_decay_patience=5,
        partitioned=True,
        use_cat=False,
        const_lada=0.5,
        num_layers=12,
        d_model=1024,
        num_heads=8,
        d_kv=64,
        d_ff=2048,
        d_label_hidden=250,
        d_biaffine=1024,

        # Label Attention Layer
        use_lal=True,  # Whether the LAL is used at all
        lal_d_kv=64,  # Dimension of Key and Query Vectors in the LAL
        lal_d_proj=
        64,  # Dimension of the output vector from each label attention head
        lal_resdrop=True,  # True means the LAL uses Residual Dropout
        lal_pwff=
        True,  # True means the LAL has a Position-wise Feed-forward Layer
        lal_q_as_matrix=False,  # False means the LAL uses learned query vectors
        lal_partitioned=
        True,  # Partitioned as per the Berkeley Self-Attentive Parser
        lal_combine_as_self=False,  # False means the LAL uses concatenation
        attention_dropout=0.2,
        embedding_dropout=0.2,
        relu_dropout=0.2,
        residual_dropout=0.2,
        use_tags=False,
        use_words=False,
        use_elmo=False,
        use_bert=False,
        use_xlnet=False,
        use_roberta=False,
        use_bert_only=False,
        use_chars_lstm=False,
        dataset='ptb',
        model_name="joint",
        embedding_type='random',
        #['glove','sskip','random']
        embedding_path="/data/glove.gz",
        punctuation='.'
        '``'
        "''"
        ':'
        ',',
        d_char_emb=64,  # A larger value may be better for use_chars_lstm
        tag_emb_dropout=0.2,
        word_emb_dropout=0.4,
        morpho_emb_dropout=0.2,
        timing_dropout=0.0,
        char_lstm_input_dropout=0.2,
        elmo_dropout=0.5,  # Note that this semi-stacks with morpho_emb_dropout!
        bert_model="bert-large-uncased",
        bert_do_lower_case=True,
        bert_transliterate="",
        xlnet_model="xlnet-large-cased",
        xlnet_do_lower_case=False,
        pad_left=False,
        roberta_model="roberta-large",
        roberta_do_lower_case=False,
    )
예제 #6
0
파일: main.py 프로젝트: DoodleJZ/ParsingAll
def make_hparams():
    return makehp.HParams(
        max_len_train=0,  # no length limit
        max_len_dev=0,  # no length limit
        sentence_max_len=300,
        learning_rate=0.0008,
        learning_rate_warmup_steps=160,
        clip_grad_norm=0.,  #no clipping
        step_decay=True,  # note that disabling step decay is not implemented
        step_decay_factor=0.5,
        step_decay_patience=5,
        partitioned=True,
        use_cat=False,
        use_lstm=False,
        joint_syn_dep=False,
        joint_syn_const=False,
        joint_srl_dep=False,
        joint_srl_span=False,
        joint_pos=False,
        use_gold_predicate=False,
        use_bispan_respresent=False,
        use_syncatspan=False,
        use_addspan=False,
        use_catspan=False,
        use_srl_biaffine=False,
        use_srl_dot=False,
        use_srl_jointdecode=False,
        const_lada=0.5,
        labmda_verb=0.6,
        labmda_span=0.6,
        max_num_span=300,
        max_num_verb=30,
        use_span_ff=False,
        use_prespan_ff=False,
        use_verb_ff=False,
        use_softmax_verb=False,
        use_softmax_span=False,
        use_softmax_srlabel=True,
        num_layers=12,
        d_model=1024,
        num_heads=8,  #12
        d_kv=64,
        d_ff=2048,
        d_label_hidden=250,
        d_biaffine=1024,
        d_score_hidden=256,
        d_verb=512,
        d_span=512,
        d_prespan=512,
        attention_dropout=0.2,
        embedding_dropout=0.2,
        relu_dropout=0.2,
        residual_dropout=0.2,
        use_tags=False,
        use_words=False,
        use_elmo=False,
        use_bert=False,
        use_chars_lstm=False,
        dataset='ptb',
        model_name="dep+const",
        embedding_type='random',
        #['glove','sskip','random']
        embedding_path="/data/glove.gz",
        punctuation='.'
        '``'
        "''"
        ':'
        ',',
        d_char_emb=64,  # A larger value may be better for use_chars_lstm
        tag_emb_dropout=0.2,
        word_emb_dropout=0.4,
        morpho_emb_dropout=0.2,
        timing_dropout=0.0,
        char_lstm_input_dropout=0.2,
        elmo_dropout=0.5,  # Note that this semi-stacks with morpho_emb_dropout!
        bert_model="bert-87-uncased",
        bert_do_lower_case=True,
        bert_transliterate="",
    )