Beispiel #1
0
def chatbot_lstm_batch_8k():
    hparams = lstm.lstm_seq2seq()
    hparams.max_length = 256
    hparams.clip_grad_norm = 0.  # i.e. no gradient clipping
    hparams.optimizer_adam_epsilon = 1e-9
    hparams.learning_rate_decay_scheme = "noam"
    hparams.learning_rate = 0.1
    hparams.learning_rate_warmup_steps = 4000
    hparams.initializer_gain = 1.0
    hparams.initializer = "uniform_unit_scaling"
    hparams.weight_decay = 0.0
    hparams.optimizer_adam_beta1 = 0.9
    hparams.optimizer_adam_beta2 = 0.98
    hparams.num_sampled_classes = 0
    hparams.label_smoothing = 0.1
    hparams.learning_rate_warmup_steps = 8000
    hparams.learning_rate = 0.2
    hparams.layer_preprocess_sequence = "n"
    hparams.layer_postprocess_sequence = "da"
    hparams.layer_prepostprocess_dropout = 0.1

    hparams.symbol_modality_num_shards = 50
    hparams.hidden_size = 256
    hparams.num_hidden_layers = 2
    hparams.batch_size = 8192
    return hparams
Beispiel #2
0
 def __init__(self, vocab_size, latent_dim, train_flag):
     self.hparams = lstm.lstm_seq2seq()
     self.hparams.num_hidden_layers = 1
     self.vocabsz = vocab_size
     self.train_flag = train_flag  # True or False
     self.embeddings_y = tf.Variable(
         tf.random_uniform([vocab_size, latent_dim], -1.0, 1.0))
Beispiel #3
0
def chatbot_lstm_batch_8k():
  hparams = lstm.lstm_seq2seq()

  hparams.clip_grad_norm = 0.0
  hparams.shared_embedding_and_softmax_weights = True
  hparams.optimizer = "Adafactor"
  hparams.use_fixed_batch_size = False
  hparams.summarize_vars = True

  hparams.symbol_modality_num_shards = 10
  hparams.hidden_size = 2048
  hparams.num_hidden_layers = 2
  hparams.batch_size = 8192
  hparams.max_length = 64
  return hparams
Beispiel #4
0
def general_gradient_checkpointed_seq2seq_hparams():
  hparams = lstm.lstm_seq2seq()

  hparams.clip_grad_norm = 0.0
  hparams.shared_embedding_and_softmax_weights = SEQ2SEQ_HPARAMS[
      "shared_embedding_and_softmax_weights"]
  hparams.optimizer = SEQ2SEQ_HPARAMS["optimizer"]
  hparams.use_fixed_batch_size = SEQ2SEQ_HPARAMS["fixed_batch_size"]
  hparams.summarize_vars = SEQ2SEQ_HPARAMS["summarize_vars"]

  hparams.symbol_modality_num_shards = SEQ2SEQ_HPARAMS["embed_num_shards"]
  hparams.hidden_size = SEQ2SEQ_HPARAMS["embedding_size"]
  hparams.num_hidden_layers = SEQ2SEQ_HPARAMS["num_layers"]
  hparams.batch_size = SEQ2SEQ_HPARAMS["batch_size"]
  hparams.max_length = SEQ2SEQ_HPARAMS["max_sentence_len"]
  return hparams
Beispiel #5
0
 def testLSTMSeq2seqBidirectionalEncoder(self):
   vocab_size = 9
   x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1))
   y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 6, 1, 1))
   hparams = lstm.lstm_seq2seq()
   p_hparams = problem_hparams.test_problem_hparams(vocab_size, vocab_size)
   with self.test_session() as session:
     features = {
         "inputs": tf.constant(x, dtype=tf.int32),
         "targets": tf.constant(y, dtype=tf.int32),
     }
     model = lstm.LSTMSeq2seqBidirectionalEncoder(
         hparams, tf.estimator.ModeKeys.TRAIN, p_hparams)
     logits, _ = model(features)
     session.run(tf.global_variables_initializer())
     res = session.run(logits)
   self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size))
Beispiel #6
0
 def testLSTMSeq2seqBidirectionalEncoder(self):
     vocab_size = 9
     x = np.random.randint(1, high=vocab_size, size=(3, 5, 1, 1))
     y = np.random.randint(1, high=vocab_size, size=(3, 6, 1, 1))
     hparams = lstm.lstm_seq2seq()
     p_hparams = problem_hparams.test_problem_hparams(
         vocab_size, vocab_size, hparams)
     with self.test_session() as session:
         features = {
             "inputs": tf.constant(x, dtype=tf.int32),
             "targets": tf.constant(y, dtype=tf.int32),
         }
         model = lstm.LSTMSeq2seqBidirectionalEncoder(
             hparams, tf.estimator.ModeKeys.TRAIN, p_hparams)
         logits, _ = model(features)
         session.run(tf.global_variables_initializer())
         res = session.run(logits)
     self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size))
def sigo_hp_basic():
    hparams = lstm_seq2seq()
    hparams.max_target_seq_length = 0
    hparams.max_input_seq_length = 0
    hparams.batch_size = 1024
    hparams.learning_rate_constant = 0.001
    hparams.learning_rate_schedule = "constant"
    hparams.dropout = 0.2
    hparams.num_hidden_layers = NUM_HIDDEN_LAYERS
    hparams.initializer = "uniform_unit_scaling"
    hparams.clip_grad_norm = 2.0
    hparams.hidden_size = HIDDEN_SIZE
    hparams.add_hparam("encoder_type", ENCODER_TYPE)
    hparams.add_hparam("decoder_type", DECODER_TYPE)
    hparams.add_hparam("list_size", LIST_SIZE)
    hparams.add_hparam("num_lists", NUM_LISTS)
    hparams.add_hparam("filter_sizes", [2, 3, 4, 5])
    hparams.add_hparam("num_filters", 128)

    # Targeted dropout for RESNET
    hparams.add_hparam("use_td", False)
    hparams.add_hparam("targeting_rate", None)
    hparams.add_hparam("keep_prob", None)

    # for transformer encoder
    hparams.add_hparam("proximity_bias", False)
    hparams.add_hparam("pos", "timing")  # timing, none
    hparams.add_hparam("num_encoder_layers", 0)
    hparams.add_hparam("num_decoder_layers", 0)
    hparams.add_hparam("attention_dropout", 0.0)
    hparams.add_hparam("num_heads", 4)
    hparams.add_hparam("use_pad_remover", True)
    hparams.add_hparam("attention_key_channels", 0)
    hparams.add_hparam("attention_value_channels", 0)
    hparams.add_hparam("self_attention_type", "dot_product")
    hparams.add_hparam("ffn_layer", "dense_relu_dense")
    hparams.add_hparam("filter_size", 64)
    hparams.add_hparam("relu_dropout", 0.0)
    hparams.add_hparam("relu_dropout_broadcast_dims", "")

    return hparams
def chatbot_lstm_batch_8k():
    hparams = lstm.lstm_seq2seq()
    hparams.hidden_size = 4096
    hparams.num_hidden_layers = 2
    hparams.batch_size = 8192
    return hparams
Beispiel #9
0
def dstc_bilstm_hparams_v4():
    hparams = lstm_seq2seq()
    hparams.num_hidden_layers = 4
    hparams.batch_size = 4096
    hparams.dropout = 0.7
    return hparams
Beispiel #10
0
def dstc_lstm_hparams_v1():
    hparams = lstm_seq2seq()
    hparams.num_hidden_layers = 2
    hparams.batch_size = 4096
    hparams.dropout = 0.9
    return hparams