def testLoad(self): vdict, idict = get_dict() ds = S2SDataSet(vdict, idict, 'bobsue-data/bobsue.seq2seq.dev.tsv') for batch in ds.batches(30): self.assertEqual(2, len(batch.data)) self.assertEqual(batch.data[0].shape[0], batch.data[1].shape[0])
def transformer_params(): """get default transformer params. Returns: Dict """ dic, vocab_size = vocab_dict.get_dict() viseme_dic = RESERVED_TOKENS +\ ['b', 'f', 'd', 'l', 'g', 'j', 'zh', 'z', 'B', 'F', 'D', 'L', 'G', 'J', 'ZH', 'Z', 'a', 'an', 'ao', 'o', 'ou', 'e', 'en', 'er', 'i', 'u', 'v', 'i1', 'i2', ' '] pinyin_dic = RESERVED_TOKENS + list(string.ascii_lowercase) + [' '] return defaultdict( lambda: None, # Model params viseme_dic = viseme_dic, pinyin_dic = pinyin_dic, label_dic = dic, initializer_gain=1.0, # Used in trainable variable initialization. hidden_size=512, # Model dimension in the hidden layers. num_hidden_layers=3, # Number of layers in the encoder and decoder stacks. num_heads=8, # Number of heads to use in multi-headed attention. filter_size=1024, # Inner layer dimension in the feedforward network. # Dropout values (only used when training) layer_postprocess_dropout=0.1, attention_dropout=0.1, relu_dropout=0.1, # Training params label_smoothing=0.1, learning_rate=2.0, learning_rate_decay_rate=1.0, learning_rate_warmup_steps=16000, # Optimizer params optimizer_adam_beta1=0.9, optimizer_adam_beta2=0.997, optimizer_adam_epsilon=1e-09, # Default prediction params extra_decode_length=50, beam_size=4, alpha=0.6, # used to calculate length normalization in beam search allow_ffn_pad=True, )
from common_train import Trainer from lm_loss import LogLoss from lstm_dataset import S2SDataSet from lstm_graph import BiLSTMEncodeGraph from ndnn.sgd import Adam from vocab_dict import get_dict vocab_dict, idx_dict = get_dict() train_ds = S2SDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.seq2seq.train.tsv") dev_ds = S2SDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.seq2seq.dev.tsv") test_ds = S2SDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.seq2seq.test.tsv") dict_size = len(vocab_dict) hidden_dim = 200 batch_size = 50 trainer = Trainer() graph = BiLSTMEncodeGraph(LogLoss(), Adam(eta=0.001, decay=0.99), dict_size, hidden_dim) trainer.train(idx_dict, 100, 's2s_bilstm', graph, train_ds, dev_ds, test_ds, 50)