def test_ConditionalLSTM_dot(): params = load_tests_params() # Current test params: Single layered LSTM - ConditionalGRU params['BIDIRECTIONAL_ENCODER'] = True params['N_LAYERS_ENCODER'] = 1 params['BIDIRECTIONAL_DEEP_ENCODER'] = True params['ENCODER_RNN_TYPE'] = 'LSTM' params['DECODER_RNN_TYPE'] = 'ConditionalLSTM' params['N_LAYERS_DECODER'] = 1 params['ATTENTION_MODE'] = 'dot' params['REBUILD_DATASET'] = True dataset = build_dataset(params) params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['OUTPUTS_IDS_DATASET'][0]] params['MODEL_NAME'] = \ params['TASK_NAME'] + '_' + params['SRC_LAN'] + params['TRG_LAN'] + '_' + params['MODEL_TYPE'] + \ '_src_emb_' + str(params['SOURCE_TEXT_EMBEDDING_SIZE']) + \ '_bidir_' + str(params['BIDIRECTIONAL_ENCODER']) + \ '_enc_' + params['ENCODER_RNN_TYPE'] + '_*' + str(params['N_LAYERS_ENCODER']) + '_' + str( params['ENCODER_HIDDEN_SIZE']) + \ '_dec_' + params['DECODER_RNN_TYPE'] + '_*' + str(params['N_LAYERS_DECODER']) + '_' + str( params['DECODER_HIDDEN_SIZE']) + params['ATTENTION_MODE'] + \ '_deepout_' + '_'.join([layer[0] for layer in params['DEEP_OUTPUT_LAYERS']]) + \ '_trg_emb_' + str(params['TARGET_TEXT_EMBEDDING_SIZE']) + \ '_' + params['OPTIMIZER'] + '_' + str(params['LR']) params['STORE_PATH'] = os.path.join(K.backend() + '_test_train_models', params['MODEL_NAME']) # Test several NMT-Keras utilities: train, sample, sample_ensemble, score_corpus... print("Training model") train_model(params) params['RELOAD'] = 1 print("Done") parser = argparse.ArgumentParser('Parser for unit testing') parser.dataset = os.path.join( params['DATASET_STORE_PATH'], 'Dataset_' + params['DATASET_NAME'] + '_' + params['SRC_LAN'] + params['TRG_LAN'] + '.pkl') parser.text = os.path.join(params['DATA_ROOT_PATH'], params['TEXT_FILES']['val'] + params['SRC_LAN']) parser.splits = ['val'] parser.config = params['STORE_PATH'] + '/config.pkl' parser.models = [params['STORE_PATH'] + '/epoch_' + str(1)] parser.verbose = 0 parser.dest = None parser.source = os.path.join(params['DATA_ROOT_PATH'], params['TEXT_FILES']['val'] + params['SRC_LAN']) parser.target = os.path.join(params['DATA_ROOT_PATH'], params['TEXT_FILES']['val'] + params['TRG_LAN']) parser.weights = [] parser.glossary = None for n_best in [True, False]: parser.n_best = n_best print("Sampling with n_best = %s " % str(n_best)) sample_ensemble(parser, params) print("Done") print("Scoring corpus") score_corpus(parser, params) print("Done")
def test_transformer(): params = load_tests_params() # Current test params: Transformer params['MODEL_TYPE'] = 'Transformer' params['TIED_EMBEDDINGS'] = True params['N_LAYERS_ENCODER'] = 2 params['N_LAYERS_DECODER'] = 2 params['MULTIHEAD_ATTENTION_ACTIVATION'] = 'relu' params['MODEL_SIZE'] = 8 params['FF_SIZE'] = params['MODEL_SIZE'] * 4 params['N_HEADS'] = 2 params['REBUILD_DATASET'] = True params['OPTIMIZED_SEARCH'] = False params['POS_UNK'] = False dataset = build_dataset(params) params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['OUTPUTS_IDS_DATASET'][0]] params['MODEL_NAME'] = \ params['TASK_NAME'] + '_' + params['SRC_LAN'] + params['TRG_LAN'] + '_' + params['MODEL_TYPE'] + \ '_model_size_' + str(params['MODEL_SIZE']) + \ '_ff_size_' + str(params['FF_SIZE']) + \ '_num_heads_' + str(params['N_HEADS']) + \ '_encoder_blocks_' + str(params['N_LAYERS_ENCODER']) + \ '_decoder_blocks_' + str(params['N_LAYERS_DECODER']) + \ '_deepout_' + '_'.join([layer[0] for layer in params['DEEP_OUTPUT_LAYERS']]) + \ '_' + params['OPTIMIZER'] + '_' + str(params['LR']) params['STORE_PATH'] = K.backend() + '_test_train_models/' + params['MODEL_NAME'] + '/' # Test several NMT-Keras utilities: train, sample, sample_ensemble, score_corpus... print ("Training model") train_model(params) params['RELOAD'] = 1 print ("Done") parser = argparse.ArgumentParser('Parser for unit testing') parser.dataset = params['DATASET_STORE_PATH'] + '/Dataset_' + params['DATASET_NAME'] + '_' + params['SRC_LAN'] + params['TRG_LAN'] + '.pkl' parser.text = params['DATA_ROOT_PATH'] + '/' + params['TEXT_FILES']['val'] + params['SRC_LAN'] parser.splits = ['val'] parser.config = params['STORE_PATH'] + '/config.pkl' parser.models = [params['STORE_PATH'] + '/epoch_' + str(1)] parser.verbose = 0 parser.dest = None parser.source = params['DATA_ROOT_PATH'] + '/' + params['TEXT_FILES']['val'] + params['SRC_LAN'] parser.target = params['DATA_ROOT_PATH'] + '/' + params['TEXT_FILES']['val'] + params['TRG_LAN'] parser.weights = [] parser.glossary = None for n_best in [True, False]: parser.n_best = n_best print ("Sampling with n_best = %s " % str(n_best)) sample_ensemble(parser, params) print ("Done") print ("Scoring corpus") score_corpus(parser, params) print ("Done")
def test_unk_replace_1(): params = load_tests_params() params['REBUILD_DATASET'] = True params['INPUT_VOCABULARY_SIZE'] = 0 params['OUTPUT_VOCABULARY_SIZE'] = 50 params['POS_UNK'] = True params['HEURISTIC'] = 1 params['ALIGN_FROM_RAW'] = True dataset = build_dataset(params) # params['MAPPING'] = DATA_ROOT_PATH + '/mapping.%s_%s.pkl' % (SRC_LAN, TRG_LAN) params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['OUTPUTS_IDS_DATASET'][0]] params['MODEL_NAME'] = \ params['TASK_NAME'] + '_' + params['SRC_LAN'] + params['TRG_LAN'] + '_' + params['MODEL_TYPE'] + \ '_src_emb_' + str(params['SOURCE_TEXT_EMBEDDING_SIZE']) + \ '_bidir_' + str(params['BIDIRECTIONAL_ENCODER']) + \ '_enc_' + params['ENCODER_RNN_TYPE'] + '_*' + str(params['N_LAYERS_ENCODER']) + '_' + str( params['ENCODER_HIDDEN_SIZE']) + \ '_dec_' + params['DECODER_RNN_TYPE'] + '_*' + str(params['N_LAYERS_DECODER']) + '_' + str( params['DECODER_HIDDEN_SIZE']) + \ '_deepout_' + '_'.join([layer[0] for layer in params['DEEP_OUTPUT_LAYERS']]) + \ '_trg_emb_' + str(params['TARGET_TEXT_EMBEDDING_SIZE']) + \ '_' + params['OPTIMIZER'] + '_' + str(params['LR']) params['STORE_PATH'] = os.path.join(K.backend() + '_test_train_models', params['MODEL_NAME']) # Test several NMT-Keras utilities: train, sample, sample_ensemble, score_corpus... print("Training model") train_model(params) params['RELOAD'] = 1 print("Done") parser = argparse.ArgumentParser('Parser for unit testing') parser.dataset = os.path.join( params['DATASET_STORE_PATH'], 'Dataset_' + params['DATASET_NAME'] + '_' + params['SRC_LAN'] + params['TRG_LAN'] + '.pkl') parser.text = os.path.join(params['DATA_ROOT_PATH'], params['TEXT_FILES']['val'] + params['SRC_LAN']) parser.splits = ['val'] parser.config = os.path.join(params['STORE_PATH'], 'config.pkl') parser.models = [os.path.join(params['STORE_PATH'], 'epoch_' + str(1))] parser.verbose = 0 parser.dest = None parser.source = os.path.join(params['DATA_ROOT_PATH'], params['TEXT_FILES']['val'] + params['SRC_LAN']) parser.target = os.path.join(params['DATA_ROOT_PATH'], params['TEXT_FILES']['val'] + params['TRG_LAN']) parser.weights = [] parser.glossary = None for n_best in [True, False]: parser.n_best = n_best print("Sampling with n_best = %s " % str(n_best)) sample_ensemble(parser, params) print("Done") print("Scoring corpus") score_corpus(parser, params) print("Done")
def resume_training(latest_epoch, use_gpu): params = load_parameters() params['MODEL_TYPE'] = 'AttentionRNNEncoderDecoder' params['USE_CUDNN'] = use_gpu params['N_GPUS'] = 2 params['MAX_EPOCH'] = latest_epoch + 1000 params['BATCH_SIZE'] = 128 params['EARLY_STOP'] = True params['PATIENCE'] = 10 params['SAVE_EACH_EVALUATION'] = True params['STORE_PATH'] = PATH + "model/" params['ATTENTION_MODE'] = "add" params['N_LAYERS_ENCODER'] = 2 params['N_LAYERS_DECODER'] = 2 params['SOURCE_TEXT_EMBEDDING_SIZE'] = 512 params['TARGET_TEXT_EMBEDDING_SIZE'] = 512 params['SKIP_VECTORS_HIDDEN_SIZE'] = 512 params['ATTENTION_SIZE'] = 512 params['ENCODER_HIDDEN_SIZE'] = 512 params['DECODER_HIDDEN_SIZE'] = 512 params['ENCODER_RNN_TYPE'] = "LSTM" params['DECODER_RNN_TYPE'] = "ConditionalLSTM" params['METRICS'] = ['coco'] params['KERAS_METRICS'] = ['perplexity'] params['APPLY_DETOKENIZATION'] = True params['LENGTH_PENALTY'] = True params['LENGTH_NORM_FACTOR'] = 1.0 params['RELOAD'] = latest_epoch params['BEAM_SIZE'] = 1 params['BEAM_SEARCH'] = True params['PLOT_EVALUATION'] = True params['MAX_PLOT_Y'] = 1. params['MODE'] = 'training' params['TENSORBOARD'] = True result = pyfiglet.figlet_format("RESUME TRAINING".format(mode), font="digital") print(result) train_model(params, load_dataset=os.getcwd() + "/dataset/Dataset_tutorial_dataset.pkl")
def test_sampling_maxlikelihood(): params = load_tests_params() params['REBUILD_DATASET'] = True params['INPUT_VOCABULARY_SIZE'] = 550 params['OUTPUT_VOCABULARY_SIZE'] = 550 params['POS_UNK'] = True params['HEURISTIC'] = 0 params['ALIGN_FROM_RAW'] = True # Sampling params: Show some samples during training. params['SAMPLE_ON_SETS'] = ['train', 'val'] params['N_SAMPLES'] = 10 params['START_SAMPLING_ON_EPOCH'] = 0 params['SAMPLE_EACH_UPDATES'] = 50 params['SAMPLING'] = 'max_likelihood' dataset = build_dataset(params) params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['OUTPUTS_IDS_DATASET'][0]] params['MODEL_NAME'] = \ params['TASK_NAME'] + '_' + params['SRC_LAN'] + params['TRG_LAN'] + '_' + params['MODEL_TYPE'] + \ '_src_emb_' + str(params['SOURCE_TEXT_EMBEDDING_SIZE']) + \ '_bidir_' + str(params['BIDIRECTIONAL_ENCODER']) + \ '_enc_' + params['ENCODER_RNN_TYPE'] + '_*' + str(params['N_LAYERS_ENCODER']) + '_' + str( params['ENCODER_HIDDEN_SIZE']) + \ '_dec_' + params['DECODER_RNN_TYPE'] + '_*' + str(params['N_LAYERS_DECODER']) + '_' + str( params['DECODER_HIDDEN_SIZE']) + \ '_deepout_' + '_'.join([layer[0] for layer in params['DEEP_OUTPUT_LAYERS']]) + \ '_trg_emb_' + str(params['TARGET_TEXT_EMBEDDING_SIZE']) + \ '_' + params['OPTIMIZER'] + '_' + str(params['LR']) params['STORE_PATH'] = os.path.join(K.backend() + '_test_train_models', params['MODEL_NAME']) # Test several NMT-Keras utilities: train, sample, sample_ensemble, score_corpus... print("Training model") train_model(params) print("Done")
def resume_training(latest_epoch): params = load_parameters() params['RELOAD'] = latest_epoch params['MODEL_TYPE'] = 'Transformer' params['USE_CUDNN'] = use_gpu params['EARLY_STOP'] = True params['PATIENCE'] = 10 params['SAVE_EACH_EVALUATION'] = True params['STORE_PATH'] = MODEL_PATH params['N_LAYERS_ENCODER'] = 2 params['N_LAYERS_DECODER'] = 2 params['N_HEADS'] = 100 params['POS_UNK'] = False # current Transformer model requires this params[ 'ATTEND_ON_OUTPUT'] = True # current Transformer model requires this params['MODEL_SIZE'] = 100 params['SOURCE_TEXT_EMBEDDING_SIZE'] = 100 params['TARGET_TEXT_EMBEDDING_SIZE'] = 100 params['SKIP_VECTORS_HIDDEN_SIZE'] = 100 params['ENCODER_HIDDEN_SIZE'] = 100 params['DECODER_HIDDEN_SIZE'] = 100 params['APPLY_DETOKENIZATION'] = True params['LENGTH_PENALTY'] = True params['LENGTH_NORM_FACTOR'] = 0.8 params['MAX_INPUT_TEXT_LEN'] = 128 params['MAX_OUTPUT_TEXT_LEN'] = 128 params['STOP_METRIC'] = 'perplexity' params['BEAM_SIZE'] = 20 params['N_GPUS'] = 2 params['START_EVAL_ON_EPOCH'] = 1 params['BATCH_SIZE'] = 128 params['EVAL_EACH'] = 1 params['MAX_EPOCH'] = 100 params['PLOT_EVALULATION'] = True params['APPLY_DETOKENIZATION'] = True params['MODE'] = 'training' params['BEAM_SEARCH'] = True params['TENSORBOARD'] = True train_model(params, load_dataset=MODEL_PATH + "/dataset/Dataset_tutorial_dataset.pkl")
def resume_training(latest_epoch): params = load_parameters() params['RELOAD'] = latest_epoch params['MODEL_TYPE'] = 'AttentionRNNEncoderDecoder' params['USE_CUDNN'] = use_gpu params['EARLY_STOP'] = True params['PATIENCE'] = 10 params['SAVE_EACH_EVALUATION'] = True params['STORE_PATH'] = MODEL_PATH params['SOURCE_TEXT_EMBEDDING_SIZE'] = 32 params['TARGET_TEXT_EMBEDDING_SIZE'] = 32 params['SKIP_VECTORS_HIDDEN_SIZE'] = 32 params['ATTENTION_SIZE'] = 32 params['ENCODER_HIDDEN_SIZE'] = 32 params['DECODER_HIDDEN_SIZE'] = 32 params['N_LAYERS_ENCODER'] = 4 params['N_LAYERS_DECODER'] = 4 params['APPLY_DETOKENIZATION'] = True params['MAX_INPUT_TEXT_LEN'] = 24 params['MAX_OUTPUT_TEXT_LEN'] = 24 params['STOP_METRIC'] = 'perplexity' params['POS_UNK'] = True params['BEAM_SIZE'] = 20 params['N_GPUS'] = 2 params['START_EVAL_ON_EPOCH'] = 1 params['BATCH_SIZE'] = 256 params['EVAL_EACH'] = 1 params['MAX_EPOCH'] = 300 params['PLOT_EVALULATION'] = True params['APPLY_DETOKENIZATION'] = True params['MODE'] = 'training' params['BEAM_SEARCH'] = True params['TENSORBOARD'] = True params['LR'] = 0.1 train_model(params, load_dataset=MODEL_PATH + "/dataset/Dataset_tutorial_dataset.pkl")
parameters = load_parameters() if args.config is not None: parameters = update_parameters(parameters, pkl2dict(args.config)) try: for arg in args.changes: try: k, v = arg.split('=') except ValueError: print( 'Overwritten arguments must have the form key=Value. \n Currently are: %s' % str(args.changes)) exit(1) try: parameters[k] = ast.literal_eval(v) except ValueError: parameters[k] = v except ValueError: print('Error processing arguments: (', k, ",", v, ")") exit(2) parameters = check_params(parameters) if parameters['MODE'] == 'training': logger.info('Running training.') train_model(parameters, args.dataset) elif parameters['MODE'] == 'sampling': logger.error( 'Depecrated function. For sampling from a trained model, please run sample_ensemble.py.' ) exit(2) logger.info('Done!')
def start_training(use_gpu): ds = Dataset('tutorial_dataset', 'tutorial', silence=False) ds.setOutput(DATA_PATH + "train_y.txt", 'train', type='text', id='target_text', tokenization='tokenize_basic', build_vocabulary=True, pad_on_batch=True, sample_weights=True, max_text_len=30, max_words=30000, min_occ=0) ds.setOutput(DATA_PATH + "val_y.txt", 'val', type='text', id='target_text', pad_on_batch=True, tokenization='tokenize_basic', sample_weights=True, max_text_len=30, max_words=0) ds.setInput(DATA_PATH + "train_x.txt", 'train', type='text', id='source_text', pad_on_batch=True, tokenization='tokenize_basic', build_vocabulary=True, fill='end', max_text_len=30, max_words=30000, min_occ=0) ds.setInput(DATA_PATH + "val_x.txt", 'val', type='text', id='source_text', pad_on_batch=True, tokenization='tokenize_basic', fill='end', max_text_len=30, min_occ=0) ds.setInput(DATA_PATH + "train_y.txt", 'train', type='text', id='state_below', required=False, tokenization='tokenize_basic', pad_on_batch=True, build_vocabulary='target_text', offset=1, fill='end', max_text_len=30, max_words=30000) ds.setInput(None, 'val', type='ghost', id='state_below', required=False) for split, input_text_filename in zip( ['train', 'val'], [DATA_PATH + "train_x.txt", DATA_PATH + "val_x.txt"]): ds.setRawInput(input_text_filename, split, type='file-name', id='raw_source_text', overwrite_split=True) """We also need to match the references with the inputs. Since we only have one reference per input sample, we set `repeat=1`.""" keep_n_captions(ds, repeat=1, n=1, set_names=['val']) """Finally, we can save our dataset instance for using in other experiments:""" saveDataset(ds, MODEL_PATH + "/dataset") """## 2. Creating and training a Neural Translation Model Now, we'll create and train a Neural Machine Translation (NMT) model. Since there is a significant number of hyperparameters, we'll use the default ones, specified in the `config.py` file. Note that almost every hardcoded parameter is automatically set from config if we run `main.py `. We'll create an `'AttentionRNNEncoderDecoder'` (a LSTM encoder-decoder with attention mechanism). Refer to the [`model_zoo.py`](https://github.com/lvapeab/nmt-keras/blob/master/nmt_keras/model_zoo.py) file for other models (e.g. Transformer). So first, let's import the model and the hyperparameters. We'll also load the dataset we stored in the previous section (not necessary as it is in memory, but as a demonstration): """ params = load_parameters() dataset = loadDataset(MODEL_PATH + "/dataset/Dataset_tutorial_dataset.pkl") """Since the number of words in the dataset may be unknown beforehand, we must update the params information according to the dataset instance:""" params['MODEL_TYPE'] = 'Transformer' params['USE_CUDNN'] = use_gpu params['EARLY_STOP'] = True params['PATIENCE'] = 10 params['SAVE_EACH_EVALUATION'] = True params['STORE_PATH'] = MODEL_PATH params['N_LAYERS_ENCODER'] = 2 params['N_LAYERS_DECODER'] = 2 params['N_HEADS'] = 100 params['POS_UNK'] = False # current Transformer model requires this params[ 'ATTEND_ON_OUTPUT'] = True # current Transformer model requires this params['MODEL_SIZE'] = 100 params['SOURCE_TEXT_EMBEDDING_SIZE'] = 100 params['TARGET_TEXT_EMBEDDING_SIZE'] = 100 params['SKIP_VECTORS_HIDDEN_SIZE'] = 100 params['ENCODER_HIDDEN_SIZE'] = 100 params['DECODER_HIDDEN_SIZE'] = 100 params['APPLY_DETOKENIZATION'] = True params['LENGTH_PENALTY'] = True params['LENGTH_NORM_FACTOR'] = 0.8 params['MAX_INPUT_TEXT_LEN'] = 128 params['MAX_OUTPUT_TEXT_LEN'] = 128 params['STOP_METRIC'] = 'perplexity' params['BEAM_SIZE'] = 20 params['N_GPUS'] = 2 params['START_EVAL_ON_EPOCH'] = 1 params['BATCH_SIZE'] = 128 params['EVAL_EACH'] = 1 params['MAX_EPOCH'] = 100 params['PLOT_EVALULATION'] = True params['APPLY_DETOKENIZATION'] = True params['MODE'] = 'training' params['BEAM_SEARCH'] = True params['TENSORBOARD'] = True train_model(params, load_dataset=MODEL_PATH + "/dataset/Dataset_tutorial_dataset.pkl")
def start_training(use_gpu): ds = Dataset('tutorial_dataset', 'tutorial', silence=False) ds.setOutput(PATH + "train_correct.txt", 'train', type='text', id='target_text', tokenization='tokenize_basic', build_vocabulary=True, pad_on_batch=True, sample_weights=True, max_text_len=100, max_words=55000, min_occ=1) ds.setOutput(PATH + "validation_correct.txt", 'val', type='text', id='target_text', pad_on_batch=True, tokenization='tokenize_basic', sample_weights=True, max_text_len=100, max_words=0) ds.setInput(PATH + "train_error.txt", 'train', type='text', id='source_text', pad_on_batch=True, tokenization='tokenize_basic', build_vocabulary=True, fill='end', max_text_len=100, max_words=55000, min_occ=1) ds.setInput(PATH + "validation_error.txt", 'val', type='text', id='source_text', pad_on_batch=True, tokenization='tokenize_basic', fill='end', max_text_len=100, min_occ=1) """...and for the 'state_below' data. Note that: 1) The offset flat is set to 1, which means that the text will be shifted to the right 1 position. 2) During sampling time, we won't have this input. Hence, we 'hack' the dataset model by inserting an artificial input, of type 'ghost' for the validation split.""" ds.setInput(PATH + "train_correct.txt", 'train', type='text', id='state_below', required=False, tokenization='tokenize_basic', pad_on_batch=True, build_vocabulary='target_text', offset=1, fill='end', max_text_len=100, max_words=55000) ds.setInput(None, 'val', type='ghost', id='state_below', required=False) """We can also keep the literal source words (for replacing unknown words).""" for split, input_text_filename in zip( ['train', 'val'], [PATH + "train_error.txt", PATH + "validation_error.txt"]): ds.setRawInput(input_text_filename, split, type='file-name', id='raw_source_text', overwrite_split=True) """We also need to match the references with the inputs. Since we only have one reference per input sample, we set `repeat=1`.""" keep_n_captions(ds, repeat=1, n=1, set_names=['val']) """Finally, we can save our dataset instance for using in other experiments:""" saveDataset(ds, PATH + "dataset") """## 2. Creating and training a Neural Translation Model Now, we'll create and train a Neural Machine Translation (NMT) model. Since there is a significant number of hyperparameters, we'll use the default ones, specified in the `config.py` file. Note that almost every hardcoded parameter is automatically set from config if we run `main.py `. We'll create an `'AttentionRNNEncoderDecoder'` (a LSTM encoder-decoder with attention mechanism). Refer to the [`model_zoo.py`](https://github.com/lvapeab/nmt-keras/blob/master/nmt_keras/model_zoo.py) file for other models (e.g. Transformer). So first, let's import the model and the hyperparameters. We'll also load the dataset we stored in the previous section (not necessary as it is in memory, but as a demonstration): """ params = load_parameters() dataset = loadDataset(PATH + "dataset/Dataset_tutorial_dataset.pkl") """Since the number of words in the dataset may be unknown beforehand, we must update the params information according to the dataset instance:""" params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len['source_text'] params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len['target_text'] params['USE_CUDNN'] = use_gpu params['N_GPUS'] = 2 params['MAX_EPOCH'] = 1000 params['EARLY_STOP'] = True params['PATIENCE'] = 10 params['SAVE_EACH_EVALUATION'] = True params['STORE_PATH'] = PATH + "model/" params['BATCH_SIZE'] = 128 params['ATTENTION_MODE'] = "add" params['N_LAYERS_ENCODER'] = 2 params['N_LAYERS_DECODER'] = 2 params['SOURCE_TEXT_EMBEDDING_SIZE'] = 512 params['TARGET_TEXT_EMBEDDING_SIZE'] = 512 params['SKIP_VECTORS_HIDDEN_SIZE'] = 512 params['ATTENTION_SIZE'] = 512 params['ENCODER_HIDDEN_SIZE'] = 512 params['DECODER_HIDDEN_SIZE'] = 512 params['ENCODER_RNN_TYPE'] = "LSTM" params['DECODER_RNN_TYPE'] = "ConditionalLSTM" params['METRICS'] = ['coco'] params['KERAS_METRICS'] = ['perplexity'] params['APPLY_DETOKENIZATION'] = True params['LENGTH_PENALTY'] = True params['LENGTH_NORM_FACTOR'] = 1.0 params['BEAM_SIZE'] = 1 params['BEAM_SEARCH'] = True params['PLOT_EVALUATION'] = True params['MAX_PLOT_Y'] = 1. params['MODE'] = 'training' params['TENSORBOARD'] = True result = pyfiglet.figlet_format("START TRAINING FROM SCRATCH".format(mode), font="digital") print(result) train_model(params, load_dataset=os.getcwd() + "/dataset/Dataset_tutorial_dataset.pkl")