def make_hparams(): return makehp.HParams( max_len_train=0, # no length limit max_len_dev=0, # no length limit sentence_max_len=300, learning_rate=0.0008, learning_rate_warmup_steps=160, clip_grad_norm=0.0, # no clipping step_decay=True, # note that disabling step decay is not implemented step_decay_factor=0.5, step_decay_patience=5, partitioned=True, use_cat=False, const_lada=0.5, num_layers=12, d_model=1024, num_heads=8, d_kv=64, d_ff=2048, d_label_hidden=250, d_biaffine=1024, attention_dropout=0.33, embedding_dropout=0.33, relu_dropout=0.33, residual_dropout=0.33, use_tags=False, use_words=False, use_elmo=False, use_bert=False, use_xlnet=False, use_bert_only=False, use_chars_lstm=False, dataset="ptb", model_name="joint", # ['glove','sskip','random'] embedding_type="glove", embedding_path="./data/glove/glove.gz", punctuation="." "``" "''" ":" ",", d_char_emb=64, tag_emb_dropout=0.33, word_emb_dropout=0.33, morpho_emb_dropout=0.33, timing_dropout=0.0, char_lstm_input_dropout=0.33, elmo_dropout=0.5, bert_model_path="./data/bert/large-uncased", bert_do_lower_case=True, bert_transliterate="", xlnet_model="xlnet-large-cased", xlnet_do_lower_case=False, pad_left=False, )
def make_hparams(): return makehp.HParams( max_len_train=0, # no length limit max_len_dev=0, # no length limit sentence_max_len=300, learning_rate=0.0008, learning_rate_warmup_steps=160, clip_grad_norm=0., #no clipping step_decay=True, # note that disabling step decay is not implemented step_decay_factor=0.5, step_decay_patience=5, partitioned=True, use_cat=False, const_lada=0.5, num_layers=12, d_model=1024, num_heads=8, d_kv=64, d_ff=2048, d_label_hidden=250, d_biaffine=1024, attention_dropout=0.2, embedding_dropout=0.2, relu_dropout=0.2, residual_dropout=0.2, use_tags=False, use_words=False, use_elmo=False, use_bert=False, use_xlnet=False, use_bert_only=False, use_chars_lstm=False, dataset='ptb', model_name="joint", embedding_type='random', #['glove','sskip','random'] embedding_path="/data/glove.gz", punctuation='.' '``' "''" ':' ',', d_char_emb=64, # A larger value may be better for use_chars_lstm tag_emb_dropout=0.2, word_emb_dropout=0.4, morpho_emb_dropout=0.2, timing_dropout=0.0, char_lstm_input_dropout=0.2, elmo_dropout=0.5, # Note that this semi-stacks with morpho_emb_dropout! bert_model="bert-large-uncased", bert_do_lower_case=True, bert_transliterate="", xlnet_model="xlnet-large-cased", xlnet_do_lower_case=False, pad_left=False, )
def from_spec(cls, spec, model): spec = spec.copy() hparams = spec['hparams'] if 'joint_pos' not in hparams: hparams['joint_pos'] = False if 'use_xlnet' not in hparams: hparams['use_xlnet'] = False if 'model' not in hparams: hparams['model'] = "bert" hparams['bert_model'] = "bert-large-uncased-whole-word-masking" spec['hparams'] = makehp.HParams(**hparams) res = cls(**spec) # if use_cuda: # res.cpu() res.load_state_dict(model) # if use_cuda: # res.cuda() return res
def make_hparams(): return makehp.HParams( max_len_train=0, # no length limit max_len_dev=0, # no length limit max_seq_length=256, learning_rate=3e-5, # learning_rate_warmup_steps=160, # clip_grad_norm=0., #no clipping # step_decay=True, # note that disabling step decay is not implemented # step_decay_factor=0.5, # step_decay_patience=5, #Probability setting p_ptb=0.5, p_constmask=5, p_srlmask=1, p_wordmask=1, p_tokenmask=0, #Joint setting partitioned=True, use_only_bert=False, # use_cat=False, # use_lstm = False, joint_syn=True, joint_srl=True, joint_pos=True, #SRL setting const_lada=0.5, labmda_verb=0.6, labmda_span=0.6, max_num_span=300, max_num_verb=30, use_srl_jointdecode=True, #Task layer setting num_layers=2, d_model=1024, num_heads=8, d_kv=64, d_ff=2048, d_label_hidden=250, d_biaffine=1024, d_score_hidden=256, d_span=512, attention_dropout=0.2, embedding_dropout=0.2, relu_dropout=0.2, residual_dropout=0.2, morpho_emb_dropout=0.2, timing_dropout=0.0, model_name="multitask-bert", punctuation='.' '``' "''" ':' ',', model="bert", use_sparse=False, use_electra=False, use_alltoken=False, bert_model="", #""bert-base-uncased", bert_do_lower_case=True, bert_transliterate="", )
def make_hparams(): return makehp.HParams( max_len_train=0, # no length limit max_len_dev=0, # no length limit sentence_max_len=300, learning_rate=0.0008, learning_rate_warmup_steps=160, clip_grad_norm=0., #no clipping step_decay=True, # note that disabling step decay is not implemented step_decay_factor=0.5, step_decay_patience=5, partitioned=True, use_cat=False, const_lada=0.5, num_layers=12, d_model=1024, num_heads=8, d_kv=64, d_ff=2048, d_label_hidden=250, d_biaffine=1024, # Label Attention Layer use_lal=True, # Whether the LAL is used at all lal_d_kv=64, # Dimension of Key and Query Vectors in the LAL lal_d_proj= 64, # Dimension of the output vector from each label attention head lal_resdrop=True, # True means the LAL uses Residual Dropout lal_pwff= True, # True means the LAL has a Position-wise Feed-forward Layer lal_q_as_matrix=False, # False means the LAL uses learned query vectors lal_partitioned= True, # Partitioned as per the Berkeley Self-Attentive Parser lal_combine_as_self=False, # False means the LAL uses concatenation attention_dropout=0.2, embedding_dropout=0.2, relu_dropout=0.2, residual_dropout=0.2, use_tags=False, use_words=False, use_elmo=False, use_bert=False, use_xlnet=False, use_roberta=False, use_bert_only=False, use_chars_lstm=False, dataset='ptb', model_name="joint", embedding_type='random', #['glove','sskip','random'] embedding_path="/data/glove.gz", punctuation='.' '``' "''" ':' ',', d_char_emb=64, # A larger value may be better for use_chars_lstm tag_emb_dropout=0.2, word_emb_dropout=0.4, morpho_emb_dropout=0.2, timing_dropout=0.0, char_lstm_input_dropout=0.2, elmo_dropout=0.5, # Note that this semi-stacks with morpho_emb_dropout! bert_model="bert-large-uncased", bert_do_lower_case=True, bert_transliterate="", xlnet_model="xlnet-large-cased", xlnet_do_lower_case=False, pad_left=False, roberta_model="roberta-large", roberta_do_lower_case=False, )
def make_hparams(): return makehp.HParams( max_len_train=0, # no length limit max_len_dev=0, # no length limit sentence_max_len=300, learning_rate=0.0008, learning_rate_warmup_steps=160, clip_grad_norm=0., #no clipping step_decay=True, # note that disabling step decay is not implemented step_decay_factor=0.5, step_decay_patience=5, partitioned=True, use_cat=False, use_lstm=False, joint_syn_dep=False, joint_syn_const=False, joint_srl_dep=False, joint_srl_span=False, joint_pos=False, use_gold_predicate=False, use_bispan_respresent=False, use_syncatspan=False, use_addspan=False, use_catspan=False, use_srl_biaffine=False, use_srl_dot=False, use_srl_jointdecode=False, const_lada=0.5, labmda_verb=0.6, labmda_span=0.6, max_num_span=300, max_num_verb=30, use_span_ff=False, use_prespan_ff=False, use_verb_ff=False, use_softmax_verb=False, use_softmax_span=False, use_softmax_srlabel=True, num_layers=12, d_model=1024, num_heads=8, #12 d_kv=64, d_ff=2048, d_label_hidden=250, d_biaffine=1024, d_score_hidden=256, d_verb=512, d_span=512, d_prespan=512, attention_dropout=0.2, embedding_dropout=0.2, relu_dropout=0.2, residual_dropout=0.2, use_tags=False, use_words=False, use_elmo=False, use_bert=False, use_chars_lstm=False, dataset='ptb', model_name="dep+const", embedding_type='random', #['glove','sskip','random'] embedding_path="/data/glove.gz", punctuation='.' '``' "''" ':' ',', d_char_emb=64, # A larger value may be better for use_chars_lstm tag_emb_dropout=0.2, word_emb_dropout=0.4, morpho_emb_dropout=0.2, timing_dropout=0.0, char_lstm_input_dropout=0.2, elmo_dropout=0.5, # Note that this semi-stacks with morpho_emb_dropout! bert_model="bert-87-uncased", bert_do_lower_case=True, bert_transliterate="", )