Example #1
0
    def testSeqtoSeqWithLuongMulAttentionParameterCount(self):
        filename = "full_ml_tests/toy_nmt/toy_nmt_luong_mul_attention.xml"

        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "nmt/toy/")
        embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "embeddings/")

        interface = BasicInterface()
        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_folder", embedding_filepath)
        interface.initialize()

        ml_helper = interface.ml_helper
        params = ml_helper.count_parameters()

        self.assertEqual(376254, params)
    def testParameterSearchCorrectCountSearchOptions(self):
        interface = BasicInterface()

        filename = "iris_tests/full_iris_with_parameter_search.xml"
        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            "iris_tests")

        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)

        search_options = interface.count_search_options(greedy=True)
        self.assertEqual(7, search_options)

        search_options = interface.count_search_options(greedy=False)
        self.assertEqual(24, search_options)
    def testParameterSearchFullRuns(self):
        interface = BasicInterface()

        filename = "iris_tests/full_iris_with_parameter_search.xml"
        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            "iris_tests")

        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)

        search_configuration = interface.search(greedy=False,
                                                minimize_valid_score=False)

        v_list = search_configuration.get_affected_variables()

        self.assertEqual(3, len(v_list))
        self.assertIn("batch_size", v_list)
        self.assertIn("inner_dim", v_list)
        self.assertIn("dropout", v_list)
    def testParameterSearchRunsNormallyUsingFirst(self):
        interface = BasicInterface()

        filename = "iris_tests/full_iris_with_parameter_search.xml"
        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            "iris_tests")

        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.initialize()

        interface.train()
        performance = interface.evaluate()

        self.assertGreaterEqual(1.0, performance)
        self.assertLess(0.9, performance)

        bg = interface.get_execution_component("batch_generator")
        self.assertEqual(10, bg.get_value_model().get_batch_size())

        mlp = interface.get_execution_component("mlp")
        self.assertEqual(10, mlp.get_value_model().get_transform_shape()[1])
        self.assertEqual(0.1, mlp.get_value_model().get_dropout_rate())
    def testProfileTrain(self):
        interface = BasicInterface()

        filename = "iris_tests/full_iris.xml"
        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            "iris_tests")

        profile_dir = self.setup_holder.filepath_handler.get_test_data_path(
            "test_output/logs")

        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.initialize(profile=True, log_dir=profile_dir)

        interface.train()
        performance = interface.evaluate()

        self.assertGreaterEqual(1.0, performance)
        self.assertLess(0.9, performance)

        self.assertTrue(os.path.exists(profile_dir + "/timeline.json"))
    def testIrisTrainAndEvaluate(self):
        interface = BasicInterface()

        filename = "iris_tests/full_iris.xml"
        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            "iris_tests")

        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.initialize()

        interface.train()
        performance = interface.evaluate()

        self.assertGreaterEqual(1.0, performance)
        self.assertLess(0.9, performance)
Example #7
0
    def testSeqtoSeqWithSgdLrDecease(self):
        filename = "full_ml_tests/toy_nmt/toy_nmt_sgd_learning_rate_decay.xml"

        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "nmt/toy/")
        embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "embeddings/")

        interface = BasicInterface()
        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_folder", embedding_filepath)
        interface.initialize()

        f = open(data_filepath + "tgt.txt")
        lines = [l.strip() for l in f]
        gold_sentences = [l + " EOS" for l in lines]
        f.close()

        upd_component = interface.get_execution_component("upd")
        learning_rate_variable = upd_component.value_model.get_learning_rate()
        lr = interface.ml_helper.tensorflow_session_model.run(
            learning_rate_variable, {})
        self.assertAlmostEqual(0.1, lr)

        interface.train(iterations=10)
        lr = interface.ml_helper.tensorflow_session_model.run(
            learning_rate_variable, {})
        self.assertAlmostEqual(0.1, lr)

        interface.train(iterations=1991)
        lr = interface.ml_helper.tensorflow_session_model.run(
            learning_rate_variable, {})
        self.assertAlmostEqual(0.0999, lr)

        interface.train(iterations=1)
        lr = interface.ml_helper.tensorflow_session_model.run(
            learning_rate_variable, {})
        self.assertAlmostEqual(0.0998001, lr)

        interface.train()
        predictions = interface.predict()

        self.assertEqual(len(gold_sentences), len(predictions))

        for i, s in enumerate(gold_sentences):
            pred_sent = " ".join(predictions[i])
            self.assertEqual(s, pred_sent)
    def testIrisSaveAndLoad(self):
        interface = BasicInterface()

        filename = "iris_tests/full_iris_no_shuffling.xml"
        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            "iris_tests")

        model_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "stored_models/iris/iris.model")
        if os.path.exists(model_filepath):
            shutil.rmtree(model_filepath)

        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.initialize()

        interface.train()
        performance = interface.evaluate()

        self.assertGreaterEqual(1.0, performance)
        self.assertLess(0.9, performance)

        interface.save(model_filepath)

        interface_2 = BasicInterface()
        interface_2.load_file(block_filepath)
        interface_2.set_variable("data_folder", data_filepath)
        interface_2.initialize()
        interface_2.load(model_filepath)

        performance = interface_2.evaluate()

        self.assertGreaterEqual(1.0, performance)
        self.assertLess(0.9, performance)
Example #9
0
    def testTrainAndValidateMarginLoss(self):
        interface = BasicInterface()

        block_filename = "full_ml_tests/relation_prediction/pure_relation_prediction_margin_loss.xml"

        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(block_filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_data_path("relation_prediction/toy-125/")
        embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path("embeddings/glove.6B.100d.txt")
        interface.load_file(block_filepath)

        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_filepath", embedding_filepath)
        interface.initialize()

        interface.train()

        val = interface.validate()
        self.assertGreater(0.25, val)
Example #10
0
    def testPredictWithoutTraining(self):
        interface = BasicInterface()

        block_filename = "full_ml_tests/relation_prediction/pure_relation_prediction.xml"

        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(block_filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_data_path("relation_prediction/toy-125/")
        embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path("embeddings/glove.6B.100d.txt")
        interface.load_file(block_filepath)

        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_filepath", embedding_filepath)
        interface.initialize()

        pred = interface.predict()
        self.assertEqual(51, len(pred))
        self.assertIn("->", pred[0])
Example #11
0
    def testPredictEmbeddings(self):
        interface = BasicInterface()

        block_filename = "full_ml_tests/relation_prediction/predict_embeddings.xml"

        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(block_filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_data_path("relation_prediction/toy-125/")
        embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path("embeddings/glove.6B.100d.txt")
        interface.load_file(block_filepath)

        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_filepath", embedding_filepath)
        interface.initialize()

        embs = interface.predict()

        self.assertEqual(51, len(embs))

        self.assertEqual(81, len(embs[0]))
        self.assertEqual(60, len(embs[1]))

        self.assertEqual(181, len(embs[0][0]))
Example #12
0
    def testSeqtoSeqWithLuongKVAttention(self):
        filename = "full_ml_tests/toy_nmt/toy_nmt_luong_kv_attention.xml"

        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "nmt/toy/")
        embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "embeddings/")

        interface = BasicInterface()
        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_folder", embedding_filepath)
        interface.initialize()

        f = open(data_filepath + "tgt.txt")
        lines = [l.strip() for l in f]
        gold_sentences = [l + " EOS" for l in lines]
        f.close()

        interface.train()
        predictions = interface.predict()

        self.assertEqual(len(gold_sentences), len(predictions))

        for i, s in enumerate(gold_sentences):
            pred_sent = " ".join(predictions[i])
            self.assertEqual(s, pred_sent)
Example #13
0
    def testSeqtoSeqWithLuongMulAttentionSaveAndLoad(self):
        filename = "full_ml_tests/toy_nmt/toy_nmt_luong_mul_attention.xml"

        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "nmt/toy/")
        embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "embeddings/")

        interface = BasicInterface()
        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_folder", embedding_filepath)
        interface.initialize()

        f = open(data_filepath + "tgt.txt")
        lines = [l.strip() for l in f]
        gold_sentences = [l + " EOS" for l in lines]
        f.close()

        interface.train()
        predictions = interface.predict()

        self.assertEqual(len(gold_sentences), len(predictions))

        for i, s in enumerate(gold_sentences):
            pred_sent = " ".join(predictions[i])
            self.assertEqual(s, pred_sent)

        model_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "stored_models/toy_nmt/luong")
        if os.path.exists(model_filepath):
            shutil.rmtree(model_filepath)

        interface.save(model_filepath)

        interface_2 = BasicInterface()
        interface_2.load_file(block_filepath)
        interface_2.set_variable("data_folder", data_filepath)
        interface_2.set_variable("embedding_folder", embedding_filepath)
        interface_2.initialize()
        interface_2.load(model_filepath)

        predictions = interface_2.predict()

        self.assertEqual(len(gold_sentences), len(predictions))

        for i, s in enumerate(gold_sentences):
            pred_sent = " ".join(predictions[i])
            self.assertEqual(s, pred_sent)
Example #14
0
    def testBasicSeqtoSeqPerplexity(self):
        filename = "full_ml_tests/toy_nmt/basic_toy_nmt_with_perplexity.xml"

        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "nmt/toy/")
        embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "embeddings/")

        interface = BasicInterface()
        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_folder", embedding_filepath)
        interface.initialize()

        initial_validation = interface.validate()
        interface.train()
        second_validation = interface.validate()

        self.assertLess(50, initial_validation)
        self.assertLess(1, second_validation)
        self.assertGreater(1.01, second_validation)
Example #15
0
    def testSeqtoSeqWithUnkReplacement(self):
        filename = "full_ml_tests/toy_nmt/toy_nmt_replace_unks.xml"

        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "nmt/toy/")
        embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "embeddings/")

        interface = BasicInterface()
        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_folder", embedding_filepath)
        interface.add_console_logger({"formatting": ["token_replacement"]})
        interface.initialize()

        f = open(data_filepath + "tgt.txt")
        lines = [l.strip() for l in f]
        gold_sentences = [l + " EOS" for l in lines]
        f.close()

        interface.train()
        predictions = interface.predict()

        self.assertEqual(len(gold_sentences), len(predictions))

        unk_count = 0
        missed_count = 0

        for i, s in enumerate(gold_sentences):
            gold_tokens = s.split(" ")
            for j, t in enumerate(gold_tokens):
                if predictions[i][j] != "UNK":
                    if predictions[i][j] != t:
                        missed_count += 1
                else:
                    unk_count += 1

        self.assertEqual(0, unk_count)
        self.assertGreater(10, missed_count)
    def testParameterSearchCanApply(self):
        interface = BasicInterface()

        filename = "iris_tests/full_iris_with_parameter_search.xml"
        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            "iris_tests")

        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)

        search_configuration = interface.search(greedy=True,
                                                minimize_valid_score=False)
        interface.apply_search_configuration(search_configuration)

        interface.train()
        performance = interface.evaluate()

        self.assertGreaterEqual(1.0, performance)
        self.assertLess(0.9, performance)
    def testLoggerSavesLog(self):
        interface = BasicInterface()

        filename = "iris_tests/full_iris.xml"
        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            "iris_tests")

        logger_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "test_output/logs/iris/log.txt")
        log_dir_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "test_output/logs/iris/")
        if os.path.exists(log_dir_filepath):
            shutil.rmtree(log_dir_filepath)

        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.initialize()

        logger_config = {
            "training": ["status", "loss", "parameters"],
            "validation": ["all"]
        }
        interface.add_file_logger(logger_config, logger_filepath)

        interface.train()
        performance = interface.evaluate()

        self.assertGreaterEqual(1.0, performance)
        self.assertLess(0.9, performance)

        self.assertTrue(os.path.exists(logger_filepath))
    def testReverseSequenceTaskWithCombinedVocab(self):
        filename = "full_ml_tests/pointer_network/pointer_plus_decoder.xml"

        block_filepath = self.setup_holder.filepath_handler.get_test_block_path(
            filename)
        data_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "nmt/reverse_sequence_plus_vocab/")
        embedding_filepath = self.setup_holder.filepath_handler.get_test_data_path(
            "embeddings/")

        interface = BasicInterface()
        interface.load_file(block_filepath)
        interface.set_variable("data_folder", data_filepath)
        interface.set_variable("embedding_folder", embedding_filepath)
        interface.initialize()

        f = open(data_filepath + "tgt.txt")
        lines = [l.strip() for l in f]
        gold_sentences = [l + " EOS" for l in lines]
        f.close()

        interface.train()
        predictions = interface.predict()

        self.assertEqual(len(gold_sentences), len(predictions))

        for i, s in enumerate(gold_sentences):
            pred_sent = " ".join(predictions[i])
            self.assertEqual(s, pred_sent)
Example #19
0
from Mindblocks.interface import BasicInterface

interface = BasicInterface()

block_filepath = "blocks/graph_free/deep_elmo_relation_prediction.block"
embedding_filepath = "data/glove.6B/glove.6B.100d.txt"
data_filepath = "data/webquestions_small_valid"
interface.load_file(block_filepath)
interface.set_variable("embedding_filepath", embedding_filepath)
interface.set_variable("data_folder", data_filepath)
interface.initialize()


def print_test():
    first = True
    with open("output.txt", 'w') as output_file:
        for line in interface.predict():
            if first:
                first = False
            else:
                print("", file=output_file)
            if line is not None:
                print("\t".join([line[0], line[2], line[3]]),
                      file=output_file,
                      end="")
            else:
                print(line, file=output_file, end="")


def print_debug():
    first = True
Example #20
0
from Mindblocks.interface import BasicInterface

controller = BasicInterface()
controller.make_gui()