def collect_params(all_params, params): collected = utils.HParams() for k in six.iterkeys(params.values()): collected.add_hparam(k, getattr(all_params, k)) return collected
def base_params(): params = utils.HParams( pad="<pad>", bos="<eos>", eos="<eos>", unk="<unk>", hidden_size=512, filter_size=2048, num_heads=8, num_encoder_layers=6, num_decoder_layers=6, attention_dropout=0.0, residual_dropout=0.1, relu_dropout=0.0, label_smoothing=0.1, normalization="after", shared_embedding_and_softmax_weights=False, shared_source_target_embedding=False, # Override default parameters warmup_steps=4000, train_steps=100000, learning_rate=7e-4, learning_rate_schedule="linear_warmup_rsqrt_decay", batch_size=4096, fixed_batch_size=False, adam_beta1=0.9, adam_beta2=0.98, adam_epsilon=1e-9, clip_grad_norm=0.0) return params
def base_params(): params = utils.HParams( pad="<pad>", bos="<eos>", eos="<eos>", unk="<unk>", hidden_size=512, filter_size=2048, num_heads=8, num_encoder_layers=6, num_decoder_layers=6, attention_dropout=0.0, residual_dropout=0.1, relu_dropout=0.0, label_smoothing=0.1, normalization="after", shared_embedding_and_softmax_weights=False, shared_source_target_embedding=False, # cache params src_cache_N=5, src_cache_k=2, src_cache_dk=512, tgt_cache_N=5, tgt_cache_k=2, tgt_cache_dk=512, src_query_method="single", src_summary_method="last_state", src_update_method="fifo", tgt_query_method="single", tgt_summary_method="last_state", tgt_update_method="fifo", enable_encoder_cache=True, enable_decoder_cache=True, enable_relative_positional_embedding=False, enable_sentence_embedding=True, enable_residual_gate=False, enable_cache_gate=False, from_torchtext=True, # Override default parameters warmup_steps=4000, train_steps=100000, learning_rate=7e-4, learning_rate_schedule="linear_warmup_rsqrt_decay", batch_size=4096, fixed_batch_size=False, adam_beta1=0.9, adam_beta2=0.98, adam_epsilon=1e-9, clip_grad_norm=0.0 ) return params
def merge_params(params1, params2): params = utils.HParams() for (k, v) in six.iteritems(params1.values()): params.add_hparam(k, v) params_dict = params.values() for (k, v) in six.iteritems(params2.values()): if k in params_dict: # Override setattr(params, k, v) else: params.add_hparam(k, v) return params
def default_params(): params = utils.HParams( input=None, output=None, vocabulary=None, model=None, # vocabulary specific pad="<pad>", bos="<bos>", eos="<eos>", unk="<unk>", append_eos=False, monte_carlo=False, device_list=[0], decode_batch_size=32, buffer_size=10000, level="sentence") return params
def default_params(): params = utils.HParams( input=None, output=None, vocabulary=None, # vocabulary specific pad="<pad>", bos="<bos>", eos="<eos>", unk="<unk>", device_list=[0], # decoding top_beams=1, beam_size=4, decode_alpha=0.6, decode_ratio=1.0, decode_length=50, decode_batch_size=32, ) return params
def default_params(): params = utils.HParams( input=["", ""], output="", model="transformer", vocab=["", ""], pad="<pad>", bos="<eos>", eos="<eos>", unk="<unk>", # Dataset batch_size=4096, fixed_batch_size=False, min_length=1, max_length=256, buffer_size=10000, # Initialization initializer_gain=1.0, initializer="uniform_unit_scaling", # Regularization scale_l1=0.0, scale_l2=0.0, # Training initial_step=0, warmup_steps=4000, train_steps=100000, update_cycle=1, optimizer="Adam", adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-8, adadelta_rho=0.95, adadelta_epsilon=1e-7, pattern="", clipping="global_norm", clip_grad_norm=5.0, learning_rate=1.0, initial_learning_rate=0.0, learning_rate_schedule="linear_warmup_rsqrt_decay", learning_rate_boundaries=[0], learning_rate_values=[0.0], device_list=[0], # Checkpoint Saving keep_checkpoint_max=20, keep_top_checkpoint_max=5, save_summary=True, save_checkpoint_secs=0, save_checkpoint_steps=1000, # Validation eval_steps=2000, eval_secs=0, top_beams=1, beam_size=4, decode_batch_size=32, decode_alpha=0.6, decode_ratio=1.0, decode_length=50, validation="", references="", ) return params