예제 #1
0
    def testSeqtoSeqWithLuongMulAttentionSaveAndLoad(self):
        filename = "full_ml_tests/toy_nmt/toy_nmt_luong_mul_attention.xml"

        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "nmt/toy/")
        embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "embeddings/")

        interface = BasicInterface()
        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_folder", embedding_filepath)
        interface.initialize()

        f = open(data_filepath + "tgt.txt")
        lines = [l.strip() for l in f]
        gold_sentences = [l + " EOS" for l in lines]
        f.close()

        interface.train()
        predictions = interface.predict()

        self.assertEqual(len(gold_sentences), len(predictions))

        for i, s in enumerate(gold_sentences):
            pred_sent = " ".join(predictions[i])
            self.assertEqual(s, pred_sent)

        model_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "stored_models/toy_nmt/luong")
        if os.path.exists(model_filepath):
            shutil.rmtree(model_filepath)

        interface.save(model_filepath)

        interface_2 = BasicInterface()
        interface_2.load_file(block_filepath)
        interface_2.set_variable("data_folder", data_filepath)
        interface_2.set_variable("embedding_folder", embedding_filepath)
        interface_2.initialize()
        interface_2.load(model_filepath)

        predictions = interface_2.predict()

        self.assertEqual(len(gold_sentences), len(predictions))

        for i, s in enumerate(gold_sentences):
            pred_sent = " ".join(predictions[i])
            self.assertEqual(s, pred_sent)
예제 #2
0
    def testSeqtoSeqWithVariableDecoderLayers(self):
        filename = "full_ml_tests/toy_nmt/toy_nmt_variable_output_lstm_layers.xml"

        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "nmt/toy/")
        embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "embeddings/")

        interface = BasicInterface()
        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_folder", embedding_filepath)
        interface.set_variable("lstm_layers", "2")
        interface.initialize()

        f = open(data_filepath + "tgt.txt")
        lines = [l.strip() for l in f]
        gold_sentences = [l + " EOS" for l in lines]
        f.close()

        interface.train()
        predictions = interface.predict()

        self.assertEqual(len(gold_sentences), len(predictions))

        for i, s in enumerate(gold_sentences):
            pred_sent = " ".join(predictions[i])
            self.assertEqual(s, pred_sent)

        interface = BasicInterface()
        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_folder", embedding_filepath)
        interface.set_variable("lstm_layers", "1")
        interface.initialize()

        f = open(data_filepath + "tgt.txt")
        lines = [l.strip() for l in f]
        gold_sentences = [l + " EOS" for l in lines]
        f.close()

        interface.train()
        predictions = interface.predict()

        self.assertEqual(len(gold_sentences), len(predictions))

        for i, s in enumerate(gold_sentences):
            pred_sent = " ".join(predictions[i])
            self.assertEqual(s, pred_sent)
    def testReverseSequenceTaskWithCombinedVocab(self):
        filename = "full_ml_tests/pointer_network/pointer_plus_decoder.xml"

        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "nmt/reverse_sequence_plus_vocab/")
        embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "embeddings/")

        interface = BasicInterface()
        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_folder", embedding_filepath)
        interface.initialize()

        f = open(data_filepath + "tgt.txt")
        lines = [l.strip() for l in f]
        gold_sentences = [l + " EOS" for l in lines]
        f.close()

        interface.train()
        predictions = interface.predict()

        self.assertEqual(len(gold_sentences), len(predictions))

        for i, s in enumerate(gold_sentences):
            pred_sent = " ".join(predictions[i])
            self.assertEqual(s, pred_sent)
예제 #4
0
    def testVariationalLuongMulAttentionWithConditionalPrior(self):
        filename = "full_ml_tests/toy_nmt/variational_toy_nmt_conditional_prior.xml"

        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "nmt/toy/")
        embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "embeddings/")

        interface = BasicInterface()
        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_folder", embedding_filepath)
        interface.initialize()

        f = open(data_filepath + "tgt.txt")
        lines = [l.strip() for l in f]
        gold_sentences = [l + " EOS" for l in lines]
        f.close()

        interface.train()
        predictions = interface.predict()

        self.assertEqual(len(gold_sentences), len(predictions))

        for i, s in enumerate(gold_sentences):
            pred_sent = " ".join(predictions[i])
            self.assertEqual(s, pred_sent)
예제 #5
0
    def testSeqtoSeqWithSgdLrDecease(self):
        filename = "full_ml_tests/toy_nmt/toy_nmt_sgd_learning_rate_decay.xml"

        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "nmt/toy/")
        embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "embeddings/")

        interface = BasicInterface()
        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_folder", embedding_filepath)
        interface.initialize()

        f = open(data_filepath + "tgt.txt")
        lines = [l.strip() for l in f]
        gold_sentences = [l + " EOS" for l in lines]
        f.close()

        upd_component = interface.get_execution_component("upd")
        learning_rate_variable = upd_component.value_model.get_learning_rate()
        lr = interface.ml_helper.tensorflow_session_model.run(
            learning_rate_variable, {})
        self.assertAlmostEqual(0.1, lr)

        interface.train(iterations=10)
        lr = interface.ml_helper.tensorflow_session_model.run(
            learning_rate_variable, {})
        self.assertAlmostEqual(0.1, lr)

        interface.train(iterations=1991)
        lr = interface.ml_helper.tensorflow_session_model.run(
            learning_rate_variable, {})
        self.assertAlmostEqual(0.0999, lr)

        interface.train(iterations=1)
        lr = interface.ml_helper.tensorflow_session_model.run(
            learning_rate_variable, {})
        self.assertAlmostEqual(0.0998001, lr)

        interface.train()
        predictions = interface.predict()

        self.assertEqual(len(gold_sentences), len(predictions))

        for i, s in enumerate(gold_sentences):
            pred_sent = " ".join(predictions[i])
            self.assertEqual(s, pred_sent)
예제 #6
0
    def testPredictWithoutTraining(self):
        interface = BasicInterface()

        block_filename = "full_ml_tests/relation_prediction/pure_relation_prediction.xml"

        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(block_filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_data_path("relation_prediction/toy-125/")
        embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path("embeddings/glove.6B.100d.txt")
        interface.load_file(block_filepath)

        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_filepath", embedding_filepath)
        interface.initialize()

        pred = interface.predict()
        self.assertEqual(51, len(pred))
        self.assertIn("->", pred[0])
예제 #7
0
    def testSeqtoSeqWithUnkReplacement(self):
        filename = "full_ml_tests/toy_nmt/toy_nmt_replace_unks.xml"

        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "nmt/toy/")
        embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "embeddings/")

        interface = BasicInterface()
        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_folder", embedding_filepath)
        interface.add_console_logger({"formatting": ["token_replacement"]})
        interface.initialize()

        f = open(data_filepath + "tgt.txt")
        lines = [l.strip() for l in f]
        gold_sentences = [l + " EOS" for l in lines]
        f.close()

        interface.train()
        predictions = interface.predict()

        self.assertEqual(len(gold_sentences), len(predictions))

        unk_count = 0
        missed_count = 0

        for i, s in enumerate(gold_sentences):
            gold_tokens = s.split(" ")
            for j, t in enumerate(gold_tokens):
                if predictions[i][j] != "UNK":
                    if predictions[i][j] != t:
                        missed_count += 1
                else:
                    unk_count += 1

        self.assertEqual(0, unk_count)
        self.assertGreater(10, missed_count)
예제 #8
0
    def testPredictEmbeddings(self):
        interface = BasicInterface()

        block_filename = "full_ml_tests/relation_prediction/predict_embeddings.xml"

        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(block_filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_data_path("relation_prediction/toy-125/")
        embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path("embeddings/glove.6B.100d.txt")
        interface.load_file(block_filepath)

        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_filepath", embedding_filepath)
        interface.initialize()

        embs = interface.predict()

        self.assertEqual(51, len(embs))

        self.assertEqual(81, len(embs[0]))
        self.assertEqual(60, len(embs[1]))

        self.assertEqual(181, len(embs[0][0]))