예제 #1
0
    def test_sequential(self):
        num_nodes = 500
        num_rels = 1
        num_edges = 10000

        name = "sequential_ordering"
        generate_random_dataset(output_dir=self.output_dir / Path(name),
                                num_nodes=num_nodes,
                                num_edges=num_edges,
                                num_rels=num_rels,
                                splits=[.1, .05, .05],
                                num_partitions=8,
                                partitioned_eval=True,
                                sequential_train_nodes=True,
                                feature_dim=10,
                                task="nc")

        generate_configs_for_dataset(self.output_dir / Path(name),
                                     model_names=[
                                         "gs_1_layer_emb", "gs_3_layer_emb",
                                         "gs_1_layer", "gs_3_layer"
                                     ],
                                     storage_names=["part_buffer"],
                                     training_names=["sync"],
                                     evaluation_names=["sync"],
                                     task="nc")

        run_configs(self.output_dir / Path(name),
                    partitioned_eval=True,
                    sequential_train_nodes=True)
        run_configs(self.output_dir / Path(name),
                    partitioned_eval=False,
                    sequential_train_nodes=True)
예제 #2
0
    def init_dataset_dir(self, name):
        num_nodes = 100
        num_rels = 10
        num_edges = 1000

        generate_random_dataset(output_dir=Path(self.base_dir) / Path(name),
                                num_nodes=num_nodes,
                                num_edges=num_edges,
                                num_rels=num_rels,
                                splits=[.9, .05, .05],
                                task="lp")

        generate_configs_for_dataset(Path(self.base_dir) / Path(name),
                                     model_names=["distmult"],
                                     storage_names=["in_memory"],
                                     training_names=["sync"],
                                     evaluation_names=["sync"],
                                     task="lp")

        for filename in os.listdir(Path(self.base_dir) / Path(name)):
            if filename.startswith("M-"):
                self.config_file = Path(
                    self.base_dir) / Path(name) / Path(filename)
                config = m.config.loadConfig(self.config_file.__str__(), True)
                m.manager.marius_train(config)
예제 #3
0
    def setUp(self):

        if not Path(TMP_TEST_DIR).exists():
            Path(TMP_TEST_DIR).mkdir()

        base_dir = TMP_TEST_DIR

        num_nodes = 100
        num_rels = 10
        num_edges = 1000

        name = "basic_lp"
        generate_random_dataset(output_dir=base_dir / Path(name),
                                num_nodes=num_nodes,
                                num_edges=num_edges,
                                num_rels=num_rels,
                                splits=[.9, .05, .05],
                                task="lp")

        generate_configs_for_dataset(base_dir / Path(name),
                                     model_names=["distmult"],
                                     storage_names=["in_memory"],
                                     training_names=["sync"],
                                     evaluation_names=["sync"],
                                     task="lp")

        for filename in os.listdir(base_dir / Path(name)):
            if filename.startswith("M-"):
                self.config_file = base_dir / Path(name) / Path(filename)

        config = m.config.loadConfig(self.config_file.__str__(), True)
        m.manager.marius_train(config)
예제 #4
0
    def test_partitioned_eval(self):
        num_nodes = 100
        num_rels = 1
        num_edges = 1000

        name = "partitioned_eval"
        generate_random_dataset(output_dir=self.output_dir / Path(name),
                                num_nodes=num_nodes,
                                num_edges=num_edges,
                                num_rels=num_rels,
                                splits=[.9, .05, .05],
                                num_partitions=8,
                                partitioned_eval=True,
                                task="lp")

        generate_configs_for_dataset(
            self.output_dir / Path(name),
            model_names=["distmult"],
            storage_names=["part_buffer"],
            training_names=["sync"],
            evaluation_names=["sync", "async", "async_deg", "async_filtered"],
            task="lp")

        run_configs(self.output_dir / Path(name), partitioned_eval=True)

        model_dir_path = self.output_dir / Path(name)
        run_configs(self.output_dir / Path(name), str(model_dir_path))
        ret, err = has_model_params(model_dir_path, "lp", False)
        assert ret == True, err
예제 #5
0
    def test_partitioned_eval(self):
        num_nodes = 500
        num_rels = 10
        num_edges = 10000

        name = "partitioned_eval"
        generate_random_dataset(output_dir=self.output_dir / Path(name),
                                num_nodes=num_nodes,
                                num_edges=num_edges,
                                num_rels=num_rels,
                                splits=[.9, .05, .05],
                                num_partitions=8,
                                partitioned_eval=True,
                                feature_dim=10,
                                task="nc")

        generate_configs_for_dataset(self.output_dir / Path(name),
                                     model_names=[
                                         "gs_1_layer_emb", "gs_3_layer_emb",
                                         "gs_1_layer", "gs_3_layer"
                                     ],
                                     storage_names=["part_buffer"],
                                     training_names=["sync"],
                                     evaluation_names=["sync"],
                                     task="nc")

        run_configs(self.output_dir / Path(name), partitioned_eval=True)

        model_dir_path = self.output_dir / Path(name)
        run_configs(self.output_dir / Path(name), str(model_dir_path))
        ret, err = has_model_params(model_dir_path, "nc", True)
        assert ret == True, err
예제 #6
0
    def test_dm(self):
        name = "dm"
        shutil.copytree(self.output_dir / Path("test_graph"),
                        self.output_dir / Path(name))

        generate_configs_for_dataset(self.output_dir / Path(name),
                                     model_names=["distmult"],
                                     storage_names=["in_memory"],
                                     training_names=["sync"],
                                     evaluation_names=["sync"],
                                     task="lp")

        run_configs(self.output_dir / Path(name))
예제 #7
0
    def test_gs(self):
        name = "gs"
        shutil.copytree(self.output_dir / Path("test_graph"),
                        self.output_dir / Path(name))

        generate_configs_for_dataset(self.output_dir / Path(name),
                                     model_names=["gs_1_layer", "gs_3_layer"],
                                     storage_names=["in_memory"],
                                     training_names=["sync"],
                                     evaluation_names=["sync"],
                                     task="nc")

        run_configs(self.output_dir / Path(name))
예제 #8
0
    def test_async(self):
        name = "async"
        shutil.copytree(self.output_dir / Path("test_graph"),
                        self.output_dir / Path(name))

        generate_configs_for_dataset(self.output_dir / Path(name),
                                     model_names=["gs_1_layer"],
                                     storage_names=["part_buffer"],
                                     training_names=["async"],
                                     evaluation_names=["async"],
                                     task="nc")

        run_configs(self.output_dir / Path(name))
예제 #9
0
    def test_sync_training(self):
        name = "sync_training"
        shutil.copytree(self.output_dir / Path("test_graph"),
                        self.output_dir / Path(name))

        generate_configs_for_dataset(
            self.output_dir / Path(name),
            model_names=["distmult"],
            storage_names=["part_buffer"],
            training_names=["sync_deg", "sync_filtered"],
            evaluation_names=["sync"],
            task="lp")

        run_configs(self.output_dir / Path(name))
예제 #10
0
    def test_async_eval(self):
        name = "async_eval"
        shutil.copytree(self.output_dir / Path("test_graph"),
                        self.output_dir / Path(name))

        generate_configs_for_dataset(
            self.output_dir / Path(name),
            model_names=["distmult", "gs_1_layer"],
            storage_names=["in_memory"],
            training_names=["sync"],
            evaluation_names=["async", "async_deg", "async_filtered"],
            task="lp")

        run_configs(self.output_dir / Path(name))
예제 #11
0
    def test_gs_uniform(self):
        name = "basic_gs_uniform"
        shutil.copytree(self.output_dir / Path("test_graph"),
                        self.output_dir / Path(name))

        generate_configs_for_dataset(
            self.output_dir / Path(name),
            model_names=["gs_1_layer_uniform", "gs_3_layer_uniform"],
            storage_names=["part_buffer"],
            training_names=["sync"],
            evaluation_names=["sync"],
            task="lp")

        run_configs(self.output_dir / Path(name))
예제 #12
0
    def test_async(self):
        name = "async"
        shutil.copytree(self.output_dir / Path("test_graph"),
                        self.output_dir / Path(name))

        generate_configs_for_dataset(self.output_dir / Path(name),
                                     model_names=["gs_1_layer"],
                                     storage_names=["part_buffer"],
                                     training_names=["async"],
                                     evaluation_names=["async"],
                                     task="nc")

        run_configs(self.output_dir / Path(name))

        model_dir_path = self.output_dir / Path(name)
        run_configs(self.output_dir / Path(name), str(model_dir_path))
        ret, err = has_model_params(model_dir_path, "nc")
        assert ret == True, err
예제 #13
0
    def test_emb(self):
        name = "emb"
        shutil.copytree(self.output_dir / Path("test_graph"),
                        self.output_dir / Path(name))

        generate_configs_for_dataset(
            self.output_dir / Path(name),
            model_names=["gs_1_layer_emb", "gs_3_layer_emb"],
            storage_names=["in_memory"],
            training_names=["sync"],
            evaluation_names=["sync"],
            task="nc")

        run_configs(self.output_dir / Path(name))

        model_dir_path = self.output_dir / Path(name)
        run_configs(self.output_dir / Path(name), str(model_dir_path))
        ret, err = has_model_params(model_dir_path, "nc", True)
        assert ret == True, err
예제 #14
0
    def test_only_train(self):
        num_nodes = 100
        num_rels = 10
        num_edges = 1000

        name = "only_train"
        generate_random_dataset(output_dir=self.output_dir / Path(name),
                                num_nodes=num_nodes,
                                num_edges=num_edges,
                                num_rels=num_rels,
                                task="lp")

        generate_configs_for_dataset(self.output_dir / Path(name),
                                     model_names=["distmult"],
                                     storage_names=["in_memory"],
                                     training_names=["sync"],
                                     evaluation_names=["sync"],
                                     task="lp")

        run_configs(self.output_dir / Path(name))
예제 #15
0
    def test_only_train_buffer_no_relations(self):
        num_nodes = 100
        num_rels = 1
        num_edges = 1000

        name = "only_train_buffer_no_relations"
        generate_random_dataset(output_dir=self.output_dir / Path(name),
                                num_nodes=num_nodes,
                                num_edges=num_edges,
                                num_rels=num_rels,
                                num_partitions=8,
                                task="lp")

        generate_configs_for_dataset(self.output_dir / Path(name),
                                     model_names=["distmult"],
                                     storage_names=["part_buffer"],
                                     training_names=["sync"],
                                     evaluation_names=["sync"],
                                     task="lp")

        run_configs(self.output_dir / Path(name))
예제 #16
0
    def test_only_train_no_relations(self):
        num_nodes = 500
        num_rels = 1
        num_edges = 10000

        name = "only_train_no_relations"
        generate_random_dataset(output_dir=self.output_dir / Path(name),
                                num_nodes=num_nodes,
                                num_edges=num_edges,
                                num_rels=num_rels,
                                feature_dim=10,
                                task="nc")

        generate_configs_for_dataset(self.output_dir / Path(name),
                                     model_names=["gs_1_layer"],
                                     storage_names=["in_memory"],
                                     training_names=["sync"],
                                     evaluation_names=["sync"],
                                     task="nc")

        run_configs(self.output_dir / Path(name))
예제 #17
0
    def test_dm(self):
        name = "dm"
        shutil.copytree(self.output_dir / Path("test_graph"),
                        self.output_dir / Path(name))

        generate_configs_for_dataset(self.output_dir / Path(name),
                                     model_names=["distmult"],
                                     storage_names=["in_memory"],
                                     training_names=["sync"],
                                     evaluation_names=["sync"],
                                     task="lp")

        run_configs(self.output_dir / Path(name))
        model_dir_path = self.output_dir / Path("test_graph") / Path("model_0")
        ret, err = has_model_params(model_dir_path)
        assert ret == True, err

        run_configs(self.output_dir / Path(name))
        model_dir_path = self.output_dir / Path("test_graph") / Path("model_1")
        ret, err = has_model_params(model_dir_path)
        assert ret == True, err

        for i in range(2, 11):
            model_dir_path = self.output_dir / Path("test_graph") / Path(
                "model_{}".format(i))
            model_dir_path.mkdir(parents=True, exist_ok=True)

        model_dir_path = self.output_dir / Path("test_graph") / Path(
            "model_10")
        ret, err = has_model_params(model_dir_path)
        assert ret == False, err

        run_configs(self.output_dir / Path(name))
        ret, err = has_model_params(model_dir_path)
        assert ret == True, err

        model_dir_path = self.output_dir / Path(name)
        run_configs(self.output_dir / Path(name), str(model_dir_path))
        ret, err = has_model_params(model_dir_path)
        assert ret == True, err
예제 #18
0
    def test_missing_dataset_yaml(self):
        generate_configs_for_dataset(self.output_dir,
                                     model_names=["distmult"],
                                     storage_names=["in_memory"],
                                     training_names=["sync"],
                                     evaluation_names=["sync"],
                                     task="lp")
        
        os.system("rm {}".format(self.output_dir / Path("dataset.yaml"))) 
        for filename in os.listdir(self.output_dir):
            if filename.startswith("M-"):
                try:
                    config_file = self.output_dir / Path(filename)
                    config = loadConfig(config_file.__str__(), save=True)
                    raise RuntimeError("Exception not thrown")
                except Exception as e:
                    assert "expected to see dataset.yaml file" in e.__str__()
        

        shutil.rmtree(self.output_dir)
        os.makedirs(self.output_dir)
        OmegaConf.save(self.ds_config, self.output_dir / Path("dataset.yaml"))

        generate_configs_for_dataset(self.output_dir,
                                     model_names=["gs_1_layer"],
                                     storage_names=["part_buffer"],
                                     training_names=["sync"],
                                     evaluation_names=["sync"],
                                     task="nc")
        
        os.system("rm {}".format(self.output_dir / Path("dataset.yaml")))
        for filename in os.listdir(self.output_dir):
            if filename.startswith("M-"):
                try:
                    config_file = self.output_dir / Path(filename)
                    config = loadConfig(config_file.__str__(), save=True)
                    raise RuntimeError("Exception not thrown")
                except Exception as e:
                    assert "expected to see dataset.yaml file" in e.__str__()
예제 #19
0
    def test_no_valid_buffer(self):
        num_nodes = 100
        num_rels = 10
        num_edges = 1000

        name = "no_valid_buffer"
        generate_random_dataset(output_dir=self.output_dir / Path(name),
                                num_nodes=num_nodes,
                                num_edges=num_edges,
                                num_rels=num_rels,
                                splits=[.9, .1],
                                num_partitions=8,
                                partitioned_eval=True,
                                task="lp")

        generate_configs_for_dataset(self.output_dir / Path(name),
                                     model_names=["distmult"],
                                     storage_names=["part_buffer"],
                                     training_names=["sync"],
                                     evaluation_names=["sync"],
                                     task="lp")

        run_configs(self.output_dir / Path(name), partitioned_eval=True)
예제 #20
0
    def test_load_config(self):

        generate_configs_for_dataset(self.output_dir,
                                     model_names=["distmult, gs_1_layer, gs_3_layer, gat_1_layer, gat_3_layer"],
                                     storage_names=["in_memory, part_buffer"],
                                     training_names=["sync"],
                                     evaluation_names=["sync"],
                                     task="lp")

        # check that each generated config can be parsed and it's members accessed.
        for filename in os.listdir(self.output_dir):
            if filename.startswith("M-"):
                config_file = self.output_dir / Path(filename)

                config = loadConfig(config_file.__str__(), save=True)
                loaded_full_config = loadConfig((config.storage.model_dir / Path("full_config.yaml")).__str__())
                assert loaded_full_config.model.random_seed == config.model.random_seed

                assert config.model is not None
                assert config.storage is not None
                assert config.training is not None
                assert config.evaluation is not None

                assert config.model.encoder is not None
                assert config.model.decoder is not None

                assert config.storage.dataset.dataset_dir.rstrip("/") == self.output_dir.__str__()
                assert config.storage.dataset.num_edges == 1000
                assert config.storage.dataset.num_nodes == 100
                assert config.storage.dataset.num_relations == 1
                assert config.storage.dataset.num_train == 100
                assert config.storage.dataset.num_valid == 10
                assert config.storage.dataset.num_test == 10

                assert config.training is not None
                assert config.evaluation is not None

                config.model.random_seed = 0
                assert config.model.random_seed == 0

        # reset directory
        shutil.rmtree(self.output_dir)
        os.makedirs(self.output_dir)
        OmegaConf.save(self.ds_config, self.output_dir / Path("dataset.yaml"))

        generate_configs_for_dataset(self.output_dir,
                                     model_names=["gs_1_layer", "gs_3_layer", "gat_1_layer", "gat_3_layer"],
                                     storage_names=["in_memory", "part_buffer"],
                                     training_names=["sync"],
                                     evaluation_names=["sync"],
                                     task="nc")

        # check that each generated config can be parsed and it's members accessed.
        for filename in os.listdir(self.output_dir):
            if filename.startswith("M-"):
                config_file = self.output_dir / Path(filename)

                config = loadConfig(config_file.__str__(), save=True)
                loaded_full_config = loadConfig((config.storage.model_dir / Path("full_config.yaml")).__str__())
                assert loaded_full_config.model.random_seed == config.model.random_seed

                assert config.model is not None
                assert config.storage is not None
                assert config.training is not None
                assert config.evaluation is not None

                assert config.model.encoder is not None
                assert config.model.decoder is not None

                assert config.storage.dataset.dataset_dir.rstrip("/") == self.output_dir.__str__()
                assert config.storage.dataset.num_edges == 1000
                assert config.storage.dataset.num_nodes == 100
                assert config.storage.dataset.num_relations == 1
                assert config.storage.dataset.num_train == 100
                assert config.storage.dataset.num_valid == 10
                assert config.storage.dataset.num_test == 10

                assert config.training is not None
                assert config.evaluation is not None

                config.model.random_seed = 0
                assert config.model.random_seed == 0