예제 #1
0
 def Task(self):
     p = base_config.SetupTransformerParams(
         model.TransformerModel.Params(),
         name='wmt14_en_de_transformer_base',
         vocab_size=self.VOCAB_SIZE,
         model_dim=512,
         hidden_dim=2048,
         num_heads=8,
         num_layers=6,
         residual_dropout_prob=0.1,
         input_dropout_prob=0.1,
         learning_rate=3.0,
         warmup_steps=40000)
     p.eval.samples_per_summary = 7500
     return p
예제 #2
0
 def Task(self):
     p = base_config.SetupTransformerParams(
         model.TransformerModel.Params(),
         name='wmt14_en_de_transformer_base',
         vocab_size=self.VOCAB_SIZE,
         model_dim=256,
         hidden_dim=512,
         num_heads=2,
         num_layers=2,
         residual_dropout_prob=0.2,
         input_dropout_prob=0.2,
         learning_rate=1.0,
         warmup_steps=1000)
     p.eval.samples_per_summary = 7500
     p.train.save_interval_seconds = 60
     p.train.max_steps = 12000
     return p
예제 #3
0
  def Task(self):
    p = feature_neighborhood_model_trans.FeatureNeighborhoodModelTrans.Params()
    if self._share_embeddings:
      output_symbol_path = FLAGS.input_symbols
    else:
      output_symbol_path = FLAGS.output_symbols
    _, p.input_symbols, p.output_symbols = (
        fn.FeatureNeighborhoodInput.ParameterizedConfigs(
            input_symbol_path=FLAGS.input_symbols,
            output_symbol_path=output_symbol_path,
            append_eos=FLAGS.append_eos,
            max_spelling_len=FLAGS.max_spelling_len,
            max_pronunciation_len=FLAGS.max_pronunciation_len,
            max_neighbors=FLAGS.max_neighbors))
    p.input_vocab_size = p.input_symbols.num_symbols()
    p.output_vocab_size = p.output_symbols.num_symbols()
    p.max_neighbors = FLAGS.max_neighbors
    p.max_pronunciation_len = FLAGS.max_pronunciation_len
    p.max_spelling_len = FLAGS.max_spelling_len
    p.start = p.output_symbols.find("<s>")
    p.share_embeddings = self._share_embeddings

    if self._share_embeddings:
      vocab_size = p.input_vocab_size
    else:
      vocab_size = p.output_vocab_size

    p = base_config.SetupTransformerParams(
        p,
        name="feature_neighborhood_with_neighbors",
        vocab_size=vocab_size,
        model_dim=p.embedding_dim,
        hidden_dim=p.enc_units,
        num_heads=self._num_heads,
        num_layers=self._num_layers,
        learning_rate=3.0,
        warmup_steps=40000,
        residual_dropout_prob=self._residual_dropout_prob,
        relu_dropout_prob=self._relu_dropout_prob,
        input_dropout_prob=self._input_dropout_prob,
        atten_dropout_prob=self._atten_dropout_prob,
        label_smoothing_uncertainty=self._label_smoothing_uncertainty)
    if not self._share_embeddings:
      p.encoder.token_emb.vocab_size = p.input_vocab_size
    p.eval.samples_per_summary = 20000
    # TODO(llion): Might need to change the output vocab size to one that can
    # be sharded to run efficiently on TPUs.
    p.decoder.softmax.num_shards = 1
    p.decoder.target_seq_len = p.max_pronunciation_len

    if py_utils.use_tpu():
      p.decoder.beam_search = model_helper.ChangeToBeamSearchTpuHelper(
          p.decoder.beam_search)

    if FLAGS.neigh_use_tpu:
      for pp in [p.encoder, p.decoder]:
        pp.token_emb = model_helper.ChangeToSimpleEmbedding(pp.token_emb)
      p.decoder.softmax = model_helper.ChangeToSimpleSoftmax(p.decoder.softmax)

    p.use_neighbors = self._use_neighbors
    if self._use_neighbors:
      p.spell_encoder = base_config.SetupTransformerEncoder(
          vocab_size=p.input_vocab_size,
          model_dim=p.embedding_dim,
          hidden_dim=p.enc_units,
          num_heads=self._num_heads,
          num_layers=self._num_layers,
          residual_dropout_prob=self._residual_dropout_prob,
          relu_dropout_prob=self._relu_dropout_prob,
          input_dropout_prob=self._input_dropout_prob,
          atten_dropout_prob=self._atten_dropout_prob)
      if self._attention_type != "CONCATAVE":
        p.pron_encoder = base_config.SetupTransformerEncoder(
            vocab_size=p.output_vocab_size,
            model_dim=p.embedding_dim,
            hidden_dim=p.enc_units,
            num_heads=self._num_heads,
            num_layers=self._num_layers,
            residual_dropout_prob=self._residual_dropout_prob,
            relu_dropout_prob=self._relu_dropout_prob,
            input_dropout_prob=self._input_dropout_prob,
            atten_dropout_prob=self._atten_dropout_prob)
      else:
        if not self._share_embeddings:
          raise ValueError("Must share embeddings to concat spelling and pron.")
      if FLAGS.neigh_use_tpu:
        for pp in [p.spell_encoder, p.pron_encoder]:
          if pp:
            pp.token_emb = model_helper.ChangeToSimpleEmbedding(pp.token_emb)

    p.also_shuffle_neighbors = self._also_shuffle_neighbors
    if self._use_neigh_id_emb:
      assert self._use_neighbors
      p.use_neigh_id_emb = True
      if self._attention_type == "CONCAT":
        neigh_id_emb = layers.EmbeddingLayer.Params().Set(
            vocab_size=FLAGS.max_neighbors + 1,  # +1 to include the main input
            embedding_dim=p.embedding_dim,
            max_num_shards=1,
            params_init=py_utils.WeightInit.Gaussian(
                1.0 / maths.sqrt(p.embedding_dim)),
            scale_sqrt_depth=True)
        p.encoder.task_emb = neigh_id_emb
      elif self._attention_type == "AVERAGE":
        neigh_id_emb = layers.EmbeddingLayer.Params().Set(
            vocab_size=FLAGS.max_neighbors,
            embedding_dim=p.embedding_dim,
            max_num_shards=1,
            params_init=py_utils.WeightInit.Gaussian(
                1.0 / maths.sqrt(p.embedding_dim)),
            scale_sqrt_depth=True)
        p.spell_encoder.task_emb = neigh_id_emb
        p.pron_encoder.task_emb = neigh_id_emb

    p.neigh_att_type = self._attention_type
    p.aux_dropout_prob = self._aux_dropout_prob

    return p