dropout = 0.1

# Configure training/optimization
clip = 50.0
teacher_forcing_ratio = 1
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_epochs = 5
epoch = 0

lang_vi, lang_en_vi, train_pairs_vi = prepareData('train.tok.vi',
                                                  'train.tok.en')
_, _, val_pairs_vi = prepareData('dev.tok.vi', 'dev.tok.en')
_, _, test_pairs_vi = prepareData('test.tok.vi', 'test.tok.en')

train_id_vi = indexesFromPairs(lang_vi, lang_en_vi, train_pairs_vi)
val_id_vi = indexesFromPairs(lang_vi, lang_en_vi, val_pairs_vi)
test_id_vi = indexesFromPairs(lang_vi, lang_en_vi, test_pairs_vi)


def collate_fn_vi(batch):
    """
    return (pair_batch, len_batch)
    """
    pairs = [sample[0] for sample in batch]
    input_lengths = [sample[1][0] for sample in batch]
    target_lengths = [sample[1][1] for sample in batch]
    max_input_length = max(input_lengths)
    max_target_length = max(target_lengths)

    pairstensor = tensorsFromPairsSorted(lang_vi, lang_en_vi, max_input_length,
Example #2
0
dropout = 0.1

# Configure training/optimization
clip = 50.0
teacher_forcing_ratio = 1
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_epochs = 5
epoch = 0

lang_zh, lang_en_zh, train_pairs_zh = prepareData('train.tok.zh',
                                                  'train.tok.en')
_, _, val_pairs_zh = prepareData('dev.tok.zh', 'dev.tok.en')
_, _, test_pairs_zh = prepareData('test.tok.zh', 'test.tok.en')

train_id_zh = indexesFromPairs(lang_zh, lang_en_zh, train_pairs_zh)
val_id_zh = indexesFromPairs(lang_zh, lang_en_zh, val_pairs_zh)
test_id_zh = indexesFromPairs(lang_zh, lang_en_zh, test_pairs_zh)


def collate_fn_zh(batch):
    """
    return (pair_batch, len_batch)
    """
    pairs = [sample[0] for sample in batch]
    input_lengths = [sample[1][0] for sample in batch]
    target_lengths = [sample[1][1] for sample in batch]
    max_input_length = max(input_lengths)
    max_target_length = max(target_lengths)

    pairstensor = tensorsFromPairsSorted(lang_zh, lang_en_zh, max_input_length,