import os, sys import dataloader as dd from keras.optimizers import * from keras.callbacks import * itokens, otokens = dd.MakeS2SDict('en2de.s2s.txt', dict_file='en2de_word.txt') Xtrain, Ytrain = dd.MakeS2SData('en2de.s2s.txt', itokens, otokens, h5_file='en2de.h5') Xvalid, Yvalid = dd.MakeS2SData('en2de.s2s.valid.txt', itokens, otokens, h5_file='en2de.valid.h5') print('seq 1 words:', itokens.num()) print('seq 2 words:', otokens.num()) print('train shapes:', Xtrain.shape, Ytrain.shape) print('valid shapes:', Xvalid.shape, Yvalid.shape) ''' from rnn_s2s import RNNSeq2Seq s2s = RNNSeq2Seq(itokens,otokens, 256) s2s.compile('rmsprop') s2s.model.fit([Xtrain, Ytrain], None, batch_size=64, epochs=30, validation_data=([Xvalid, Yvalid], None)) ''' from transformer import Transformer, LRSchedulerPerStep, LRSchedulerPerEpoch d_model = 256 s2s = Transformer(itokens, otokens, len_limit=70, d_model=d_model, d_inner_hid=512, \ n_head=4, d_k=64, d_v=64, layers=2, dropout=0.1)
import os, sys import dataloader as dd from keras.optimizers import * from keras.callbacks import * itokens, otokens = dd.MakeS2SDict('data/ara.txt', dict_file='data/ara_word.txt') Xtrain, Ytrain = dd.MakeS2SData('data/ara.txt', itokens, otokens, h5_file='data/ara.h5') Xvalid, Yvalid = dd.MakeS2SData('data/ara.txt', itokens, otokens, h5_file='data/ara.valid.h5') print('seq 1 words:', itokens.num()) print('seq 2 words:', otokens.num()) print('train shapes:', Xtrain.shape, Ytrain.shape) print('valid shapes:', Xvalid.shape, Yvalid.shape) ''' from rnn_s2s import RNNSeq2Seq s2s = RNNSeq2Seq(itokens,otokens, 256) s2s.compile('rmsprop') s2s.model.fit([Xtrain, Ytrain], None, batch_size=64, epochs=30, validation_data=([Xvalid, Yvalid], None)) ''' from transformer import Transformer, LRSchedulerPerStep, LRSchedulerPerEpoch d_model = 256 s2s = Transformer(itokens, otokens, len_limit=70, d_model=d_model, d_inner_hid=512, \
'd_k': d_k, 'd_v': d_v, 'len_limit': len_limit, 'dropout': dropout, 'batch_size': batch_size, 'max_len': max_len } filepath = createHistoryFile(model_parameters, sys.argv) ############### Load trainingsdata ################ if 'testdata' in sys.argv: itokens, otokens = dd.MakeS2SDict( 'data/test_subset/en2de.s2s.txt', dict_file='data/test_subset/en2de_word.txt') Xtrain, Ytrain = dd.MakeS2SData('data/test_subset/en2de.s2s.txt', itokens, otokens, h5_file='data/test_subset/en2de.h5') Xvalid, Yvalid = dd.MakeS2SData('data/test_subset/en2de.s2s.valid.txt', itokens, otokens, h5_file='data/test_subset/en2de.valid.h5') if 'origdata' in sys.argv: itokens, otokens = dd.MakeS2SDict('data/en2de.s2s.txt', dict_file='data/en2de_word.txt') Xtrain, Ytrain = dd.MakeS2SData('data/en2de.s2s.txt', itokens, otokens, h5_file='data/en2de.h5') Xvalid, Yvalid = dd.MakeS2SData('data/en2de.s2s.valid.txt', itokens,