def chatbot_lstm_attn(): hparams = lstm.lstm_attention() hparams.max_length = 256 hparams.clip_grad_norm = 0. # i.e. no gradient clipping hparams.optimizer_adam_epsilon = 1e-9 hparams.learning_rate_decay_scheme = "noam" hparams.learning_rate = 0.1 hparams.learning_rate_warmup_steps = 4000 hparams.initializer_gain = 1.0 hparams.initializer = "uniform_unit_scaling" hparams.weight_decay = 0.0 hparams.optimizer_adam_beta1 = 0.9 hparams.optimizer_adam_beta2 = 0.98 hparams.num_sampled_classes = 0 hparams.label_smoothing = 0.1 hparams.learning_rate_warmup_steps = 8000 hparams.learning_rate = 0.2 hparams.layer_preprocess_sequence = "n" hparams.layer_postprocess_sequence = "da" hparams.layer_prepostprocess_dropout = 0.1 hparams.hidden_size = 1024 hparams.num_hidden_layers = 2 hparams.attn_vec_size = 128 hparams.batch_size = 4096 return hparams
def testLSTMSeq2SeqAttention(self): vocab_size = 9 x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 6, 1, 1)) hparams = lstm.lstm_attention() p_hparams = problem_hparams.test_problem_hparams( vocab_size, vocab_size) x = tf.constant(x, dtype=tf.int32) x = tf.placeholder_with_default(x, shape=[None, None, 1, 1]) with self.test_session() as session: features = { "inputs": x, "targets": tf.constant(y, dtype=tf.int32), } model = lstm.LSTMSeq2seqAttention(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size))
def testLSTMSeq2SeqAttention(self): vocab_size = 9 x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 6, 1, 1)) hparams = lstm.lstm_attention() p_hparams = problem_hparams.test_problem_hparams( hparams, vocab_size, vocab_size) x = tf.constant(x, dtype=tf.int32) x._shape = tf.TensorShape([None, None, 1, 1]) with self.test_session() as session: features = { "inputs": x, "targets": tf.constant(y, dtype=tf.int32), } model = lstm.LSTMSeq2SeqAttention(hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) sharded_logits, _, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size))
def chatbot_lstm_attn(): hparams = lstm.lstm_attention() hparams.hidden_size = 1024 hparams.num_hidden_layers = 2 hparams.attn_vec_size = 128 hparams.batch_size = 4096 return hparams
def lstm_attention_think_lr3(): hparams = lstm_attention() hparams.learning_rate_constant = 0.001 hparams.learning_rate_schedule = "constant" hparams.num_hidden_layers = 1 hparams.hidden_size = 512 hparams.add_hparam("eval_throttle_seconds", 100) return hparams
def gru_attention_scan(): hparams = lstm_attention() hparams.learning_rate_constant = 0.001 hparams.learning_rate_schedule = "constant" hparams.add_hparam("stack_size", 10) hparams.add_hparam("num_stacks", 10) hparams.add_hparam("decoder_type", DECODER_TYPE) hparams.num_hidden_layers = 1 hparams.hidden_size = 50 hparams.dropout = 0.5 hparams.add_hparam("eval_throttle_seconds", 100) return hparams
def testLSTMSeq2seqAttentionBidirectionalEncoder(self): vocab_size = 9 x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 6, 1, 1)) hparams = lstm.lstm_attention() p_hparams = problem_hparams.test_problem_hparams(vocab_size, vocab_size) x = tf.constant(x, dtype=tf.int32) x = tf.placeholder_with_default(x, shape=[None, None, 1, 1]) with self.test_session() as session: features = { "inputs": x, "targets": tf.constant(y, dtype=tf.int32), } model = lstm.LSTMSeq2seqAttentionBidirectionalEncoder( hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size))
def semeval_lstm(): hparams = lstm_attention() hparams.batch_size = 512 hparams.num_heads = 1 return hparams
def dstc_bilstm_attention_hparams_v4(): hparams = lstm_attention() hparams.num_hidden_layers = 4 hparams.batch_size = 4096 hparams.dropout = 0.7 return hparams
def dstc_lstm_attention_hparams_v1(): hparams = lstm_attention() hparams.num_hidden_layers = 2 hparams.batch_size = 4096 hparams.dropout = 0.9 return hparams