def Task(self): p = base_config.SetupTransformerParams( model.TransformerModel.Params(), name='wmt14_en_de_transformer_base', vocab_size=self.VOCAB_SIZE, model_dim=512, hidden_dim=2048, num_heads=8, num_layers=6, residual_dropout_prob=0.1, input_dropout_prob=0.1, learning_rate=3.0, warmup_steps=40000) p.eval.samples_per_summary = 7500 return p
def Task(self): p = base_config.SetupTransformerParams( model.TransformerModel.Params(), name='wmt14_en_de_transformer_base', vocab_size=self.VOCAB_SIZE, model_dim=256, hidden_dim=512, num_heads=2, num_layers=2, residual_dropout_prob=0.2, input_dropout_prob=0.2, learning_rate=1.0, warmup_steps=1000) p.eval.samples_per_summary = 7500 p.train.save_interval_seconds = 60 p.train.max_steps = 12000 return p
def Task(self): p = feature_neighborhood_model_trans.FeatureNeighborhoodModelTrans.Params() if self._share_embeddings: output_symbol_path = FLAGS.input_symbols else: output_symbol_path = FLAGS.output_symbols _, p.input_symbols, p.output_symbols = ( fn.FeatureNeighborhoodInput.ParameterizedConfigs( input_symbol_path=FLAGS.input_symbols, output_symbol_path=output_symbol_path, append_eos=FLAGS.append_eos, max_spelling_len=FLAGS.max_spelling_len, max_pronunciation_len=FLAGS.max_pronunciation_len, max_neighbors=FLAGS.max_neighbors)) p.input_vocab_size = p.input_symbols.num_symbols() p.output_vocab_size = p.output_symbols.num_symbols() p.max_neighbors = FLAGS.max_neighbors p.max_pronunciation_len = FLAGS.max_pronunciation_len p.max_spelling_len = FLAGS.max_spelling_len p.start = p.output_symbols.find("<s>") p.share_embeddings = self._share_embeddings if self._share_embeddings: vocab_size = p.input_vocab_size else: vocab_size = p.output_vocab_size p = base_config.SetupTransformerParams( p, name="feature_neighborhood_with_neighbors", vocab_size=vocab_size, model_dim=p.embedding_dim, hidden_dim=p.enc_units, num_heads=self._num_heads, num_layers=self._num_layers, learning_rate=3.0, warmup_steps=40000, residual_dropout_prob=self._residual_dropout_prob, relu_dropout_prob=self._relu_dropout_prob, input_dropout_prob=self._input_dropout_prob, atten_dropout_prob=self._atten_dropout_prob, label_smoothing_uncertainty=self._label_smoothing_uncertainty) if not self._share_embeddings: p.encoder.token_emb.vocab_size = p.input_vocab_size p.eval.samples_per_summary = 20000 # TODO(llion): Might need to change the output vocab size to one that can # be sharded to run efficiently on TPUs. p.decoder.softmax.num_shards = 1 p.decoder.target_seq_len = p.max_pronunciation_len if py_utils.use_tpu(): p.decoder.beam_search = model_helper.ChangeToBeamSearchTpuHelper( p.decoder.beam_search) if FLAGS.neigh_use_tpu: for pp in [p.encoder, p.decoder]: pp.token_emb = model_helper.ChangeToSimpleEmbedding(pp.token_emb) p.decoder.softmax = model_helper.ChangeToSimpleSoftmax(p.decoder.softmax) p.use_neighbors = self._use_neighbors if self._use_neighbors: p.spell_encoder = base_config.SetupTransformerEncoder( vocab_size=p.input_vocab_size, model_dim=p.embedding_dim, hidden_dim=p.enc_units, num_heads=self._num_heads, num_layers=self._num_layers, residual_dropout_prob=self._residual_dropout_prob, relu_dropout_prob=self._relu_dropout_prob, input_dropout_prob=self._input_dropout_prob, atten_dropout_prob=self._atten_dropout_prob) if self._attention_type != "CONCATAVE": p.pron_encoder = base_config.SetupTransformerEncoder( vocab_size=p.output_vocab_size, model_dim=p.embedding_dim, hidden_dim=p.enc_units, num_heads=self._num_heads, num_layers=self._num_layers, residual_dropout_prob=self._residual_dropout_prob, relu_dropout_prob=self._relu_dropout_prob, input_dropout_prob=self._input_dropout_prob, atten_dropout_prob=self._atten_dropout_prob) else: if not self._share_embeddings: raise ValueError("Must share embeddings to concat spelling and pron.") if FLAGS.neigh_use_tpu: for pp in [p.spell_encoder, p.pron_encoder]: if pp: pp.token_emb = model_helper.ChangeToSimpleEmbedding(pp.token_emb) p.also_shuffle_neighbors = self._also_shuffle_neighbors if self._use_neigh_id_emb: assert self._use_neighbors p.use_neigh_id_emb = True if self._attention_type == "CONCAT": neigh_id_emb = layers.EmbeddingLayer.Params().Set( vocab_size=FLAGS.max_neighbors + 1, # +1 to include the main input embedding_dim=p.embedding_dim, max_num_shards=1, params_init=py_utils.WeightInit.Gaussian( 1.0 / maths.sqrt(p.embedding_dim)), scale_sqrt_depth=True) p.encoder.task_emb = neigh_id_emb elif self._attention_type == "AVERAGE": neigh_id_emb = layers.EmbeddingLayer.Params().Set( vocab_size=FLAGS.max_neighbors, embedding_dim=p.embedding_dim, max_num_shards=1, params_init=py_utils.WeightInit.Gaussian( 1.0 / maths.sqrt(p.embedding_dim)), scale_sqrt_depth=True) p.spell_encoder.task_emb = neigh_id_emb p.pron_encoder.task_emb = neigh_id_emb p.neigh_att_type = self._attention_type p.aux_dropout_prob = self._aux_dropout_prob return p