def testSeqtoSeqWithLuongMulAttentionSaveAndLoad(self): filename = "full_ml_tests/toy_nmt/toy_nmt_luong_mul_attention.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_data_path( "nmt/toy/") embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path( "embeddings/") interface = BasicInterface() interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_folder", embedding_filepath) interface.initialize() f = open(data_filepath + "tgt.txt") lines = [l.strip() for l in f] gold_sentences = [l + " EOS" for l in lines] f.close() interface.train() predictions = interface.predict() self.assertEqual(len(gold_sentences), len(predictions)) for i, s in enumerate(gold_sentences): pred_sent = " ".join(predictions[i]) self.assertEqual(s, pred_sent) model_filepath = self.setup_holder.filepath_handler.get_test_data_path( "stored_models/toy_nmt/luong") if os.path.exists(model_filepath): shutil.rmtree(model_filepath) interface.save(model_filepath) interface_2 = BasicInterface() interface_2.load_file(block_filepath) interface_2.set_variable("data_folder", data_filepath) interface_2.set_variable("embedding_folder", embedding_filepath) interface_2.initialize() interface_2.load(model_filepath) predictions = interface_2.predict() self.assertEqual(len(gold_sentences), len(predictions)) for i, s in enumerate(gold_sentences): pred_sent = " ".join(predictions[i]) self.assertEqual(s, pred_sent)
def testSeqtoSeqWithVariableDecoderLayers(self): filename = "full_ml_tests/toy_nmt/toy_nmt_variable_output_lstm_layers.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_data_path( "nmt/toy/") embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path( "embeddings/") interface = BasicInterface() interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_folder", embedding_filepath) interface.set_variable("lstm_layers", "2") interface.initialize() f = open(data_filepath + "tgt.txt") lines = [l.strip() for l in f] gold_sentences = [l + " EOS" for l in lines] f.close() interface.train() predictions = interface.predict() self.assertEqual(len(gold_sentences), len(predictions)) for i, s in enumerate(gold_sentences): pred_sent = " ".join(predictions[i]) self.assertEqual(s, pred_sent) interface = BasicInterface() interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_folder", embedding_filepath) interface.set_variable("lstm_layers", "1") interface.initialize() f = open(data_filepath + "tgt.txt") lines = [l.strip() for l in f] gold_sentences = [l + " EOS" for l in lines] f.close() interface.train() predictions = interface.predict() self.assertEqual(len(gold_sentences), len(predictions)) for i, s in enumerate(gold_sentences): pred_sent = " ".join(predictions[i]) self.assertEqual(s, pred_sent)
def testReverseSequenceTaskWithCombinedVocab(self): filename = "full_ml_tests/pointer_network/pointer_plus_decoder.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_data_path( "nmt/reverse_sequence_plus_vocab/") embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path( "embeddings/") interface = BasicInterface() interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_folder", embedding_filepath) interface.initialize() f = open(data_filepath + "tgt.txt") lines = [l.strip() for l in f] gold_sentences = [l + " EOS" for l in lines] f.close() interface.train() predictions = interface.predict() self.assertEqual(len(gold_sentences), len(predictions)) for i, s in enumerate(gold_sentences): pred_sent = " ".join(predictions[i]) self.assertEqual(s, pred_sent)
def testVariationalLuongMulAttentionWithConditionalPrior(self): filename = "full_ml_tests/toy_nmt/variational_toy_nmt_conditional_prior.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_data_path( "nmt/toy/") embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path( "embeddings/") interface = BasicInterface() interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_folder", embedding_filepath) interface.initialize() f = open(data_filepath + "tgt.txt") lines = [l.strip() for l in f] gold_sentences = [l + " EOS" for l in lines] f.close() interface.train() predictions = interface.predict() self.assertEqual(len(gold_sentences), len(predictions)) for i, s in enumerate(gold_sentences): pred_sent = " ".join(predictions[i]) self.assertEqual(s, pred_sent)
def testSeqtoSeqWithSgdLrDecease(self): filename = "full_ml_tests/toy_nmt/toy_nmt_sgd_learning_rate_decay.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_data_path( "nmt/toy/") embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path( "embeddings/") interface = BasicInterface() interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_folder", embedding_filepath) interface.initialize() f = open(data_filepath + "tgt.txt") lines = [l.strip() for l in f] gold_sentences = [l + " EOS" for l in lines] f.close() upd_component = interface.get_execution_component("upd") learning_rate_variable = upd_component.value_model.get_learning_rate() lr = interface.ml_helper.tensorflow_session_model.run( learning_rate_variable, {}) self.assertAlmostEqual(0.1, lr) interface.train(iterations=10) lr = interface.ml_helper.tensorflow_session_model.run( learning_rate_variable, {}) self.assertAlmostEqual(0.1, lr) interface.train(iterations=1991) lr = interface.ml_helper.tensorflow_session_model.run( learning_rate_variable, {}) self.assertAlmostEqual(0.0999, lr) interface.train(iterations=1) lr = interface.ml_helper.tensorflow_session_model.run( learning_rate_variable, {}) self.assertAlmostEqual(0.0998001, lr) interface.train() predictions = interface.predict() self.assertEqual(len(gold_sentences), len(predictions)) for i, s in enumerate(gold_sentences): pred_sent = " ".join(predictions[i]) self.assertEqual(s, pred_sent)
def testPredictWithoutTraining(self): interface = BasicInterface() block_filename = "full_ml_tests/relation_prediction/pure_relation_prediction.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path(block_filename) data_filepath = self.setup_holder.filepath_handler.get_test_data_path("relation_prediction/toy-125/") embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path("embeddings/glove.6B.100d.txt") interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_filepath", embedding_filepath) interface.initialize() pred = interface.predict() self.assertEqual(51, len(pred)) self.assertIn("->", pred[0])
def testSeqtoSeqWithUnkReplacement(self): filename = "full_ml_tests/toy_nmt/toy_nmt_replace_unks.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path( filename) data_filepath = self.setup_holder.filepath_handler.get_test_data_path( "nmt/toy/") embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path( "embeddings/") interface = BasicInterface() interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_folder", embedding_filepath) interface.add_console_logger({"formatting": ["token_replacement"]}) interface.initialize() f = open(data_filepath + "tgt.txt") lines = [l.strip() for l in f] gold_sentences = [l + " EOS" for l in lines] f.close() interface.train() predictions = interface.predict() self.assertEqual(len(gold_sentences), len(predictions)) unk_count = 0 missed_count = 0 for i, s in enumerate(gold_sentences): gold_tokens = s.split(" ") for j, t in enumerate(gold_tokens): if predictions[i][j] != "UNK": if predictions[i][j] != t: missed_count += 1 else: unk_count += 1 self.assertEqual(0, unk_count) self.assertGreater(10, missed_count)
def testPredictEmbeddings(self): interface = BasicInterface() block_filename = "full_ml_tests/relation_prediction/predict_embeddings.xml" block_filepath = self.setup_holder.filepath_handler.get_test_block_path(block_filename) data_filepath = self.setup_holder.filepath_handler.get_test_data_path("relation_prediction/toy-125/") embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path("embeddings/glove.6B.100d.txt") interface.load_file(block_filepath) interface.set_variable("data_folder", data_filepath) interface.set_variable("embedding_filepath", embedding_filepath) interface.initialize() embs = interface.predict() self.assertEqual(51, len(embs)) self.assertEqual(81, len(embs[0])) self.assertEqual(60, len(embs[1])) self.assertEqual(181, len(embs[0][0]))