batch_size = 2 MAX_LENGTH = 70 attn_model = 'concat' hidden_size = 500 n_layers = 2 dropout = 0.1 # Configure training/optimization clip = 50.0 teacher_forcing_ratio = 1 learning_rate = 0.0001 decoder_learning_ratio = 5.0 n_epochs = 5 epoch = 0 lang_vi, lang_en_vi, train_pairs_vi = prepareData('train.tok.vi', 'train.tok.en') _, _, val_pairs_vi = prepareData('dev.tok.vi', 'dev.tok.en') _, _, test_pairs_vi = prepareData('test.tok.vi', 'test.tok.en') train_id_vi = indexesFromPairs(lang_vi, lang_en_vi, train_pairs_vi) val_id_vi = indexesFromPairs(lang_vi, lang_en_vi, val_pairs_vi) test_id_vi = indexesFromPairs(lang_vi, lang_en_vi, test_pairs_vi) def collate_fn_vi(batch): """ return (pair_batch, len_batch) """ pairs = [sample[0] for sample in batch] input_lengths = [sample[1][0] for sample in batch] target_lengths = [sample[1][1] for sample in batch]
batch_size = 2 MAX_LENGTH = 70 attn_model = 'dot' hidden_size = 128 n_layers = 2 dropout = 0.1 # Configure training/optimization clip = 50.0 teacher_forcing_ratio = 1 learning_rate = 0.0001 decoder_learning_ratio = 5.0 n_epochs = 5 epoch = 0 lang_zh, lang_en_zh, train_pairs_zh = prepareData('train.tok.zh', 'train.tok.en') _, _, val_pairs_zh = prepareData('dev.tok.zh', 'dev.tok.en') _, _, test_pairs_zh = prepareData('test.tok.zh', 'test.tok.en') train_id_zh = indexesFromPairs(lang_zh, lang_en_zh, train_pairs_zh) val_id_zh = indexesFromPairs(lang_zh, lang_en_zh, val_pairs_zh) test_id_zh = indexesFromPairs(lang_zh, lang_en_zh, test_pairs_zh) def collate_fn_zh(batch): """ return (pair_batch, len_batch) """ pairs = [sample[0] for sample in batch] input_lengths = [sample[1][0] for sample in batch] target_lengths = [sample[1][1] for sample in batch]