def test_eval_result_invariability(self, tmpdir, gpu, model_name, variant_name, variant_options): """ Performs some translations with a preexisting models and compare the results using different options with previous results of the same experiment. The results should be identical. If not, it means that a recent commit have changed the behavior of the system. """ test_data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../tests_data") data_src_file = os.path.join(test_data_dir, "src2.txt") data_tgt_file = os.path.join(test_data_dir, "tgt2.txt") train_dir = os.path.join(test_data_dir, "models") train_prefix = os.path.join(train_dir, "{0}.train".format(model_name)) search_eval_dir = tmpdir.mkdir("eval") search_file = os.path.join( str(search_eval_dir), 'translations_using_{0}.txt'.format(variant_name)) args_eval_search = [train_prefix + '.train.config', train_prefix + '.model.best.npz', data_src_file, search_file] + variant_options.split(' ') if gpu is not None: args_eval_search += ['--gpu', gpu] main(arguments=["eval"] + args_eval_search) with open(os.path.join(str(test_data_dir), "models/{0}.translations_using_{1}.txt".format(model_name, variant_name))) as f: expected_translations = f.readlines() with open(search_file) as f: actual_translations = f.readlines() print("expected_translations") for p in expected_translations: print(p) print("actual_translations") for p in actual_translations: print(p) assert(actual_translations == expected_translations)
def test_make_data(self, tmpdir, gpu): test_data_dir = os.path.join( os.path.dirname(os.path.abspath(__file__)), "../tests_data") train_dir = tmpdir.mkdir("train") data_prefix = str(train_dir.join("test1.data")) data_src_file = os.path.join(test_data_dir, "src2.txt") data_tgt_file = os.path.join(test_data_dir, "tgt2.txt") args = 'make_data {0} {1} {2} --dev_src {0} --dev_tgt {1} --src_voc_size 512 --tgt_voc_size 3 --max_nb_ex 3 --test_src {0} --test_tgt {1} --tgt_segmentation_type char --src_segmentation_type word'.format( data_src_file, data_tgt_file, data_prefix).split(' ') main(arguments=args)
def test_data_creation(self, tmpdir, gpu): test_data_dir = os.path.join( os.path.dirname( os.path.abspath(__file__)), "../tests_data") train_dir = tmpdir.mkdir("train") data_prefix = str(train_dir.join("test1.data")) train_prefix = str(train_dir.join("test1.train")) args = ["make_data", os.path.join(test_data_dir, "src2.txt"), os.path.join(test_data_dir, "tgt2.txt"), data_prefix] main(arguments=args) args_train = ["train", data_prefix, train_prefix] + "--max_nb_iters 5 --mb_size 2 --Ei 10 --Eo 12 --Hi 30 --Ha 70 --Ho 15 --Hl 23".split(" ") if gpu is not None: args_train += ['--gpu', gpu] main(arguments=args_train)
def test_compare_beam_search_vs_same_ensemble_search(self, tmpdir, gpu): """ Compare beam_search and a ensemble_beam_search using 3 identical models and check whether the translation results are equal or not. """ test_data_dir = os.path.join( os.path.dirname(os.path.abspath(__file__)), "../tests_data") train_dir = tmpdir.mkdir("train") data_prefix = str(train_dir.join("test1.data")) train_prefix = str(train_dir.join("test1.train")) data_src_file = os.path.join(test_data_dir, "src2.txt") data_tgt_file = os.path.join(test_data_dir, "tgt2.txt") args = 'make_data {0} {1} {2} --dev_src {0} --dev_tgt {1}'.format( data_src_file, data_tgt_file, data_prefix).split(' ') main(arguments=args) args_train = [ "train", data_prefix, train_prefix ] + "--max_nb_iters 200 --mb_size 2 --Ei 10 --Eo 12 --Hi 30 --Ha 70 --Ho 15 --Hl 23".split( " ") if gpu is not None: args_train += ['--gpu', gpu] main(arguments=args_train) beam_search_eval_dir = tmpdir.mkdir("eval_beam_search") beam_search_file = os.path.join(str(beam_search_eval_dir), 'translations.txt') args_eval = [ "eval", train_prefix + '.train.config', train_prefix + '.model.best.npz', data_src_file, beam_search_file ] + '--mode beam_search --beam_width 30'.split(' ') if gpu is not None: args_eval += ['--gpu', gpu] main(arguments=args_eval) ensemble_search_eval_dir = tmpdir.mkdir("eval_ensemble_search") ensemble_search_file = os.path.join(str(ensemble_search_eval_dir), 'translations.txt') args_eval = ["eval", train_prefix + '.train.config', train_prefix + '.model.best.npz', data_src_file, ensemble_search_file] + \ '--mode beam_search --beam_width 30 --additional_training_config {0} {0} --additional_trained_model {1} {1}'.format(train_prefix + '.train.config', train_prefix + '.model.best.npz').split(' ') if gpu is not None: args_eval += ['--gpu', gpu] main(arguments=args_eval) with open(beam_search_file) as f: beam_search_translations = f.readlines() with open(ensemble_search_file) as f: ensemble_search_translations = f.readlines() print "beam_search_translations" for p in beam_search_translations: print p print "ensemble_search_translations" for p in ensemble_search_translations: print p assert (beam_search_translations == ensemble_search_translations)
def test_train_result_invariability(self, tmpdir, gpu, model_name, options): """ Train some models and check if the result is the same as the expected result. The result should be identical. If not, it means that a recent commit have changed the behavior of the system. """ seed = 1234 random.seed(seed) np.random.seed(seed) test_data_dir = os.path.join( os.path.dirname(os.path.abspath(__file__)), "../tests_data") data_src_file = os.path.join(test_data_dir, "src2.txt") data_tgt_file = os.path.join(test_data_dir, "tgt2.txt") work_dir = tmpdir.mkdir("work") test_prefix = "{0}/tests/tests_data/models/{1}".format( str(work_dir), model_name) ref_prefix = "tests/tests_data/models/{0}".format(model_name) args_make_data = [ data_src_file, data_tgt_file, test_prefix + "_test.data" ] + '--dev_src {0} --dev_tgt {1}'.format(data_src_file, data_tgt_file).split(' ') main(arguments=["make_data"] + args_make_data) args_train = [test_prefix + "_test.data", test_prefix + "_test.train" ] + options.split(' ') if gpu is not None: args_train += ['--gpu', gpu] main(arguments=["train"] + args_train) with np.load(test_prefix + '_test.train.model.best.npz') as test_model_data: with np.load(ref_prefix + '.train.model.best.npz') as ref_model_data: assert (len(test_model_data.keys()) == len( ref_model_data.keys())) for test_key, test_value in test_model_data.iteritems(): np.testing.assert_array_almost_equal( test_value, ref_model_data[test_key], 5)
def test_eval(self, tmpdir, gpu): test_data_dir = os.path.join( os.path.dirname(os.path.abspath(__file__)), "../tests_data") print("test_data_dir={0}", format(test_data_dir)) train_dir = tmpdir.mkdir("train") data_prefix = str(train_dir.join("test1.data")) train_prefix = str(train_dir.join("test1.train")) data_src_file = os.path.join(test_data_dir, "src2.txt") data_tgt_file = os.path.join(test_data_dir, "tgt2.txt") args = 'make_data {0} {1} {2} --dev_src {0} --dev_tgt {1}'.format( data_src_file, data_tgt_file, data_prefix).split(' ') main(arguments=args) args_train = ["train"] + [ data_prefix, train_prefix ] + "--max_nb_iters 6 --report_every 2 --mb_size 2 --Ei 10 --Eo 12 --Hi 30 --Ha 70 --Ho 15 --Hl 23".split( " ") if gpu is not None: args_train += ['--gpu', gpu] main(arguments=args_train) eval_dir = tmpdir.mkdir("eval") translation_file = os.path.join(str(eval_dir), 'translations.txt') args_eval = ["eval", train_prefix + '.train.config', train_prefix + '.model.best.npz', data_src_file, translation_file] +\ '--mode beam_search --beam_width 30'.split(' ') +\ ["--additional_training_config", train_prefix + '.train.config', "--additional_trained_model", train_prefix + '.model.best_loss.npz'] +\ ["--tgt_fn", data_tgt_file, "--ref", data_tgt_file] + "--max_nb_ex 3 --mb_size 1 --beam_pruning_margin 10".split(" ") +\ "--nb_steps 23 --nb_steps_ratio 2.8 --nb_batch_to_sort 2 --prob_space_combination".split(" ") if gpu is not None: args_eval += ['--gpu', gpu] main(arguments=args_eval)
def test_checkpoint_saving(self, tmpdir, gpu): """ Test no error happens during checkpoint saving. """ test_data_dir = os.path.join( os.path.dirname(os.path.abspath(__file__)), "../tests_data") train_dir = tmpdir.mkdir("train") data_prefix = str(train_dir.join("test1.data")) train_prefix = str(train_dir.join("test1.train")) data_src_file = os.path.join(test_data_dir, "src2.txt") data_tgt_file = os.path.join(test_data_dir, "tgt2.txt") args = 'make_data {0} {1} {2} --dev_src {0} --dev_tgt {1}'.format( data_src_file, data_tgt_file, data_prefix).split(' ') main(arguments=args) args_train = [ "train", data_prefix, train_prefix ] + "--max_nb_iters 10 --mb_size 2 --Ei 10 --Eo 12 --Hi 30 --Ha 70 --Ho 15 --Hl 23 --save_ckpt_every 5".split( " ") if gpu is not None: args_train += ['--gpu', gpu] main(arguments=args_train)
def test_train(self, tmpdir, gpu): test_data_dir = os.path.join( os.path.dirname(os.path.abspath(__file__)), "../tests_data") train_dir = tmpdir.mkdir("train") data_prefix = str(train_dir.join("test1.data")) train_prefix = str(train_dir.join("test1.train")) data_src_file = os.path.join(test_data_dir, "src2.txt") data_tgt_file = os.path.join(test_data_dir, "tgt2.txt") args = 'make_data {0} {1} {2} --dev_src {0} --dev_tgt {1}'.format( data_src_file, data_tgt_file, data_prefix).split(' ') main(arguments=args) args_train = ["train"] + [data_prefix, train_prefix] +\ "--lexicon_prob_epsilon 0.5 --max_nb_iters 3 --mb_size 2 --Ei 10 --Eo 12 --Hi 30 --Ha 70 --Ho 15 --Hl 23".split(" ") +\ "--encoder_cell_type lstm --decoder_cell_type lstm --nb_batch_to_sort 2 --noise_on_prev_word --l2_gradient_clipping 1".split(" ") +\ "--weight_decay 0.001 --optimizer momentum --learning_rate 0.23 --momentum 0.56 --randomized_data".split(" ") +\ "--no_shuffle_of_training_data --max_src_tgt_length 56 --report_every 34 --sample_every 45".split(" ") +\ "--sample_every 45 --save_ckpt_every 56".split(" ") if gpu is not None: args_train += ['--gpu', gpu] main(arguments=args_train)
def test_overfitting(self, tmpdir, gpu): """ Test whether the translation results are equal to the target translations or not when the model is overtrained. """ test_data_dir = os.path.join( os.path.dirname(os.path.abspath(__file__)), "../tests_data") train_dir = tmpdir.mkdir("train") data_prefix = str(train_dir.join("test1.data")) train_prefix = str(train_dir.join("test1.train")) data_src_file = os.path.join(test_data_dir, "src2.txt") data_tgt_file = os.path.join(test_data_dir, "tgt2.txt") args = 'make_data {0} {1} {2} --dev_src {0} --dev_tgt {1}'.format( data_src_file, data_tgt_file, data_prefix).split(' ') main(arguments=args) args_train = [ "train", data_prefix, train_prefix ] + "--max_nb_iters 1500 --mb_size 2 --Ei 10 --Eo 12 --Hi 30 --Ha 70 --Ho 15 --Hl 23".split( " ") if gpu is not None: args_train += ['--gpu', gpu] main(arguments=args_train) eval_dir = tmpdir.mkdir("eval") translation_file = os.path.join(str(eval_dir), 'translations.txt') args_eval = [ "eval", train_prefix + '.train.config', train_prefix + '.model.best.npz', data_src_file, translation_file ] + '--mode beam_search --beam_width 30'.split(' ') if gpu is not None: args_eval += ['--gpu', gpu] main(arguments=args_eval) with open(data_tgt_file) as f: expected_translations = f.readlines() with open(translation_file) as f: actual_translations = f.readlines() print "expected_translations" for p in expected_translations: print p print "actual_translations" for p in actual_translations: print p assert (actual_translations == expected_translations)
def test_compare_beam_search_vs_greedy_search(self, tmpdir, gpu): """ Compare a beam search using a width of 1 with a greedy search and check whether the translation results are equal or not. """ # At this moment, this test fails once in a while. # To increase the chance of finding a case where this test fails, I execute several times. for i in range(0, 10): test_data_dir = os.path.join( os.path.dirname(os.path.abspath(__file__)), "../tests_data") train_dir = tmpdir.mkdir("train_{0}".format(i)) data_prefix = str(train_dir.join("test1.data")) train_prefix = str(train_dir.join("test1.train")) data_src_file = os.path.join(test_data_dir, "src2.txt") data_tgt_file = os.path.join(test_data_dir, "tgt2.txt") args = 'make_data {0} {1} {2} --dev_src {0} --dev_tgt {1}'.format( data_src_file, data_tgt_file, data_prefix).split(' ') main(arguments=args) args_train = [ "train", data_prefix, train_prefix ] + "--max_nb_iters 200 --mb_size 2 --Ei 10 --Eo 12 --Hi 30 --Ha 70 --Ho 15 --Hl 23".split( " ") if gpu is not None: args_train += ['--gpu', gpu] main(arguments=args_train) beam_search_eval_dir = tmpdir.mkdir( "eval_beam_search_{0}".format(i)) beam_search_file = os.path.join(str(beam_search_eval_dir), 'translations.txt') args_eval = [ "eval", train_prefix + '.train.config', train_prefix + '.model.best.npz', data_src_file, beam_search_file ] + '--mode beam_search --beam_width 1'.split(' ') if gpu is not None: args_eval += ['--gpu', gpu] main(arguments=args_eval) greedy_search_eval_dir = tmpdir.mkdir( "eval_greedy_search_{0}".format(i)) greedy_search_file = os.path.join(str(greedy_search_eval_dir), 'translations.txt') args_eval = [ "eval", train_prefix + '.train.config', train_prefix + '.model.best.npz', data_src_file, greedy_search_file ] + '--mode translate'.split(' ') if gpu is not None: args_eval += ['--gpu', gpu] main(arguments=args_eval) with open(beam_search_file) as f: beam_search_translations = f.readlines() with open(greedy_search_file) as f: greedy_search_translations = f.readlines() print "beam_search_translations" for p in beam_search_translations: print p print "greedy_search_translations" for p in greedy_search_translations: print p assert (beam_search_translations == greedy_search_translations)
def test_compare_beam_search_vs_diff_ensemble_search(self, tmpdir, gpu): """ Compare beam_search and a ensemble_beam_search using 3 different models and check whether the translation results are equal or not. The results should differ most of the time although in theory, it's possible to be equal. """ for i in range(0, 4): print i test_data_dir = os.path.join( os.path.dirname(os.path.abspath(__file__)), "../tests_data") train_dir = tmpdir.mkdir("train_{0}".format(i)) data_prefix = str(train_dir.join("test1.data")) train_prefix = str(train_dir.join("test1.train")) data_src_file = os.path.join(test_data_dir, "src2.txt") data_tgt_file = os.path.join(test_data_dir, "tgt2.txt") args = 'make_data {0} {1} {2} --dev_src {0} --dev_tgt {1}'.format( data_src_file, data_tgt_file, data_prefix).split(' ') main(arguments=args) args_train = [ "train", data_prefix, train_prefix ] + "--max_nb_iters 200 --mb_size 2 --Ei 10 --Eo 12 --Hi 30 --Ha 70 --Ho 15 --Hl 23".split( " ") if gpu is not None: args_train += ['--gpu', gpu] main(arguments=args_train) train_dir = str(tmpdir.join("train_0")) train_prefix = os.path.join(train_dir, "test1.train") beam_search_eval_dir = tmpdir.mkdir("eval_beam_search") beam_search_file = os.path.join(str(beam_search_eval_dir), 'translations.txt') args_eval_beam_search = [ "eval", train_prefix + '.train.config', train_prefix + '.model.best.npz', data_src_file, beam_search_file ] + '--mode beam_search --beam_width 30'.split(' ') if gpu is not None: args_eval_beam_search += ['--gpu', gpu] main(arguments=args_eval_beam_search) ensemble_search_eval_dir = tmpdir.mkdir("eval_ensemble_search") ensemble_search_file = os.path.join(str(ensemble_search_eval_dir), 'translations.txt') train_dir_1 = str(tmpdir.join("train_1")) train_prefix_1 = os.path.join(train_dir_1, "test1.train") train_dir_2 = str(tmpdir.join("train_2")) train_prefix_2 = os.path.join(train_dir_2, "test1.train") train_dir_3 = str(tmpdir.join("train_3")) train_prefix_3 = os.path.join(train_dir_3, "test1.train") args_eval_ensemble_search = ["eval", train_prefix_1 + '.train.config', train_prefix_1 + '.model.best.npz', data_src_file, ensemble_search_file] + \ '--mode beam_search --beam_width 30 --additional_training_config {0} {1} --additional_trained_model {2} {3}'.format( train_prefix_2 + '.train.config', train_prefix_3 + '.train.config', train_prefix_2 + '.model.best.npz', train_prefix_3 + '.model.best.npz').split(' ') if gpu is not None: args_eval_ensemble_search += ['--gpu', gpu] main(arguments=args_eval_ensemble_search) with open(beam_search_file) as f: beam_search_translations = f.readlines() with open(ensemble_search_file) as f: ensemble_search_translations = f.readlines() print "beam_search_translations" for p in beam_search_translations: print p print "ensemble_search_translations" for p in ensemble_search_translations: print p assert (beam_search_translations != ensemble_search_translations)
def test_config_saving(self, tmpdir, gpu): """ Test no error happens during checkpoint saving. """ test_data_dir = os.path.join( os.path.dirname(os.path.abspath(__file__)), "../tests_data") train_dir = tmpdir.mkdir("train") data_prefix = str(train_dir.join("test1.data")) train_prefix = str(train_dir.join("test1.train")) data_src_file = os.path.join(test_data_dir, "src2.txt") data_tgt_file = os.path.join(test_data_dir, "tgt2.txt") args = 'make_data {0} {1} {2} --dev_src {0} --dev_tgt {1}'.format( data_src_file, data_tgt_file, data_prefix).split(' ') main(arguments=args) args_train = [ "train", data_prefix, train_prefix ] + "--max_nb_iters 5 --mb_size 2 --Ei 10 --Eo 12 --Hi 30 --Ha 70 --Ho 15 --Hl 23".split( " ") if gpu is not None: args_train += ['--gpu', gpu] main(arguments=args_train) config_filename = train_prefix + ".train.config" train_prefix_2 = train_prefix + ".2" args_train = [ "train", "--config", config_filename, "--save_prefix", train_prefix_2 ] if gpu is not None: args_train += ['--gpu', gpu] main(arguments=args_train) config_filename2 = train_prefix_2 + ".train.config" import json config1 = json.load(open(config_filename)) config2 = json.load(open(config_filename2)) def compare_dict_except(d1, d2, except_fields=None): k_list_1 = set(d1.keys()) k_list_2 = set(d2.keys()) k_xor = (k_list_1 - k_list_2) | (k_list_2 - k_list_1) for k_diff in k_xor: if except_fields is None or k_diff not in except_fields: return False for k in k_list_1 & k_list_2: v1 = d1[k] if isinstance(v1, dict): compare_result = compare_dict_except( d1[k], d2[k], except_fields=except_fields) if not compare_result: return False else: if v1 != d2[k] and (except_fields is None or k not in except_fields): return False return True assert compare_dict_except( config1, config2, except_fields="metadata save_prefix config".split())