def chatbot_lstm_batch_8k(): hparams = lstm.lstm_seq2seq() hparams.max_length = 256 hparams.clip_grad_norm = 0. # i.e. no gradient clipping hparams.optimizer_adam_epsilon = 1e-9 hparams.learning_rate_decay_scheme = "noam" hparams.learning_rate = 0.1 hparams.learning_rate_warmup_steps = 4000 hparams.initializer_gain = 1.0 hparams.initializer = "uniform_unit_scaling" hparams.weight_decay = 0.0 hparams.optimizer_adam_beta1 = 0.9 hparams.optimizer_adam_beta2 = 0.98 hparams.num_sampled_classes = 0 hparams.label_smoothing = 0.1 hparams.learning_rate_warmup_steps = 8000 hparams.learning_rate = 0.2 hparams.layer_preprocess_sequence = "n" hparams.layer_postprocess_sequence = "da" hparams.layer_prepostprocess_dropout = 0.1 hparams.symbol_modality_num_shards = 50 hparams.hidden_size = 256 hparams.num_hidden_layers = 2 hparams.batch_size = 8192 return hparams
def __init__(self, vocab_size, latent_dim, train_flag): self.hparams = lstm.lstm_seq2seq() self.hparams.num_hidden_layers = 1 self.vocabsz = vocab_size self.train_flag = train_flag # True or False self.embeddings_y = tf.Variable( tf.random_uniform([vocab_size, latent_dim], -1.0, 1.0))
def chatbot_lstm_batch_8k(): hparams = lstm.lstm_seq2seq() hparams.clip_grad_norm = 0.0 hparams.shared_embedding_and_softmax_weights = True hparams.optimizer = "Adafactor" hparams.use_fixed_batch_size = False hparams.summarize_vars = True hparams.symbol_modality_num_shards = 10 hparams.hidden_size = 2048 hparams.num_hidden_layers = 2 hparams.batch_size = 8192 hparams.max_length = 64 return hparams
def general_gradient_checkpointed_seq2seq_hparams(): hparams = lstm.lstm_seq2seq() hparams.clip_grad_norm = 0.0 hparams.shared_embedding_and_softmax_weights = SEQ2SEQ_HPARAMS[ "shared_embedding_and_softmax_weights"] hparams.optimizer = SEQ2SEQ_HPARAMS["optimizer"] hparams.use_fixed_batch_size = SEQ2SEQ_HPARAMS["fixed_batch_size"] hparams.summarize_vars = SEQ2SEQ_HPARAMS["summarize_vars"] hparams.symbol_modality_num_shards = SEQ2SEQ_HPARAMS["embed_num_shards"] hparams.hidden_size = SEQ2SEQ_HPARAMS["embedding_size"] hparams.num_hidden_layers = SEQ2SEQ_HPARAMS["num_layers"] hparams.batch_size = SEQ2SEQ_HPARAMS["batch_size"] hparams.max_length = SEQ2SEQ_HPARAMS["max_sentence_len"] return hparams
def testLSTMSeq2seqBidirectionalEncoder(self): vocab_size = 9 x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 6, 1, 1)) hparams = lstm.lstm_seq2seq() p_hparams = problem_hparams.test_problem_hparams(vocab_size, vocab_size) with self.test_session() as session: features = { "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), } model = lstm.LSTMSeq2seqBidirectionalEncoder( hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size))
def testLSTMSeq2seqBidirectionalEncoder(self): vocab_size = 9 x = np.random.randint(1, high=vocab_size, size=(3, 5, 1, 1)) y = np.random.randint(1, high=vocab_size, size=(3, 6, 1, 1)) hparams = lstm.lstm_seq2seq() p_hparams = problem_hparams.test_problem_hparams( vocab_size, vocab_size, hparams) with self.test_session() as session: features = { "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), } model = lstm.LSTMSeq2seqBidirectionalEncoder( hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size))
def sigo_hp_basic(): hparams = lstm_seq2seq() hparams.max_target_seq_length = 0 hparams.max_input_seq_length = 0 hparams.batch_size = 1024 hparams.learning_rate_constant = 0.001 hparams.learning_rate_schedule = "constant" hparams.dropout = 0.2 hparams.num_hidden_layers = NUM_HIDDEN_LAYERS hparams.initializer = "uniform_unit_scaling" hparams.clip_grad_norm = 2.0 hparams.hidden_size = HIDDEN_SIZE hparams.add_hparam("encoder_type", ENCODER_TYPE) hparams.add_hparam("decoder_type", DECODER_TYPE) hparams.add_hparam("list_size", LIST_SIZE) hparams.add_hparam("num_lists", NUM_LISTS) hparams.add_hparam("filter_sizes", [2, 3, 4, 5]) hparams.add_hparam("num_filters", 128) # Targeted dropout for RESNET hparams.add_hparam("use_td", False) hparams.add_hparam("targeting_rate", None) hparams.add_hparam("keep_prob", None) # for transformer encoder hparams.add_hparam("proximity_bias", False) hparams.add_hparam("pos", "timing") # timing, none hparams.add_hparam("num_encoder_layers", 0) hparams.add_hparam("num_decoder_layers", 0) hparams.add_hparam("attention_dropout", 0.0) hparams.add_hparam("num_heads", 4) hparams.add_hparam("use_pad_remover", True) hparams.add_hparam("attention_key_channels", 0) hparams.add_hparam("attention_value_channels", 0) hparams.add_hparam("self_attention_type", "dot_product") hparams.add_hparam("ffn_layer", "dense_relu_dense") hparams.add_hparam("filter_size", 64) hparams.add_hparam("relu_dropout", 0.0) hparams.add_hparam("relu_dropout_broadcast_dims", "") return hparams
def chatbot_lstm_batch_8k(): hparams = lstm.lstm_seq2seq() hparams.hidden_size = 4096 hparams.num_hidden_layers = 2 hparams.batch_size = 8192 return hparams
def dstc_bilstm_hparams_v4(): hparams = lstm_seq2seq() hparams.num_hidden_layers = 4 hparams.batch_size = 4096 hparams.dropout = 0.7 return hparams
def dstc_lstm_hparams_v1(): hparams = lstm_seq2seq() hparams.num_hidden_layers = 2 hparams.batch_size = 4096 hparams.dropout = 0.9 return hparams