def transformer_tall_tpu(): """ HParams for Transformer model on TPU and finetuned for twitter depression (td) classification. """ hparams = transformer.transformer_tall_finetune_textclass() transformer.update_hparams_for_tpu(hparams) return hparams
def transformer_l2_arctic_tpu(): """HParams for training ASR model on L2 Arctic on TPU""" hparams = transformer_l2_arctic() update_hparams_for_tpu(hparams) hparams.batch_size = 16 hparams.max_length = 1650 * 80 # this limits inputs[1] * inputs[2] hparams.max_input_seq_length = 1650 hparams.max_target_seq_length = 350 return hparams
def transformer_tpu_td(): """ HParams for Transformer model on TPU and finetuned for twitter depression (td) classification. """ hparams = transformer.transformer_base() hparams.learning_rate = 0.025 transformer.update_hparams_for_tpu(hparams) return hparams
def evolved_transformer_big_tpu_didd(): """Big parameters for Evolved Transformer model on TPU.""" hparams = evolved_transformer_big_didd() transformer.update_hparams_for_tpu(hparams) hparams.max_length = 1024 hparams.hidden_size = 1024 hparams.num_heads = 16 hparams.filter_size = 32768 # max fitting in 16G memory is 49152, batch 2 hparams.batch_size = 4 hparams.multiproblem_vocab_size = 2**15 return hparams
def iwslt_baseline_tpu(): """HParams for Transformer model on TPU.""" hparams = transformer.transformer_base() transformer.update_hparams_for_tpu(hparams) hparams.hidden_size = 256 hparams.filter_size = 1024 hparams.num_hidden_layers = 5 hparams.num_heads = 2 hparams.layer_prepostprocess_dropout = 0.1 hparams.attention_dropout = 0.1 hparams.relu_dropout = 0.1 hparams.dropout = 0.1 hparams.add_hparam("pos_attn", False) return hparams
def transformer_ae_base_tpu(): """Base config adjusted for TPU.""" hparams = transformer_ae_base() transformer.update_hparams_for_tpu(hparams) hparams.batch_size = 512 return hparams
def adaptive_universal_transformer_global_base_tpu(): hparams = adaptive_universal_transformer_global_base() transformer.update_hparams_for_tpu(hparams) hparams.add_step_timing_signal = False return hparams
def universal_transformer_base_tpu(): hparams = universal_transformer_base() hparams = update_hparams_for_universal_transformer(hparams) transformer.update_hparams_for_tpu(hparams) hparams.add_step_timing_signal = False return hparams
def transformer_ae_base_tpu(): """Base config adjusted for TPU.""" hparams = transformer_ae_base() transformer.update_hparams_for_tpu(hparams) hparams.batch_size = 512 return hparams
def transformer_anime_chatbot_tpu(): hparams = transformer_anime_chatbot() transformer.update_hparams_for_tpu(hparams) return hparams
def wmt_enro_tpu(): """HParams for Transformer model on TPU.""" hparams = transformer.transformer_base() hparams = transformer.update_hparams_for_tpu(hparams) hparams.batch_size = 512 return hparams