def test_sequential(self): num_nodes = 500 num_rels = 1 num_edges = 10000 name = "sequential_ordering" generate_random_dataset(output_dir=self.output_dir / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, splits=[.1, .05, .05], num_partitions=8, partitioned_eval=True, sequential_train_nodes=True, feature_dim=10, task="nc") generate_configs_for_dataset(self.output_dir / Path(name), model_names=[ "gs_1_layer_emb", "gs_3_layer_emb", "gs_1_layer", "gs_3_layer" ], storage_names=["part_buffer"], training_names=["sync"], evaluation_names=["sync"], task="nc") run_configs(self.output_dir / Path(name), partitioned_eval=True, sequential_train_nodes=True) run_configs(self.output_dir / Path(name), partitioned_eval=False, sequential_train_nodes=True)
def init_dataset_dir(self, name): num_nodes = 100 num_rels = 10 num_edges = 1000 generate_random_dataset(output_dir=Path(self.base_dir) / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, splits=[.9, .05, .05], task="lp") generate_configs_for_dataset(Path(self.base_dir) / Path(name), model_names=["distmult"], storage_names=["in_memory"], training_names=["sync"], evaluation_names=["sync"], task="lp") for filename in os.listdir(Path(self.base_dir) / Path(name)): if filename.startswith("M-"): self.config_file = Path( self.base_dir) / Path(name) / Path(filename) config = m.config.loadConfig(self.config_file.__str__(), True) m.manager.marius_train(config)
def setUp(self): if not Path(TMP_TEST_DIR).exists(): Path(TMP_TEST_DIR).mkdir() base_dir = TMP_TEST_DIR num_nodes = 100 num_rels = 10 num_edges = 1000 name = "basic_lp" generate_random_dataset(output_dir=base_dir / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, splits=[.9, .05, .05], task="lp") generate_configs_for_dataset(base_dir / Path(name), model_names=["distmult"], storage_names=["in_memory"], training_names=["sync"], evaluation_names=["sync"], task="lp") for filename in os.listdir(base_dir / Path(name)): if filename.startswith("M-"): self.config_file = base_dir / Path(name) / Path(filename) config = m.config.loadConfig(self.config_file.__str__(), True) m.manager.marius_train(config)
def test_partitioned_eval(self): num_nodes = 100 num_rels = 1 num_edges = 1000 name = "partitioned_eval" generate_random_dataset(output_dir=self.output_dir / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, splits=[.9, .05, .05], num_partitions=8, partitioned_eval=True, task="lp") generate_configs_for_dataset( self.output_dir / Path(name), model_names=["distmult"], storage_names=["part_buffer"], training_names=["sync"], evaluation_names=["sync", "async", "async_deg", "async_filtered"], task="lp") run_configs(self.output_dir / Path(name), partitioned_eval=True) model_dir_path = self.output_dir / Path(name) run_configs(self.output_dir / Path(name), str(model_dir_path)) ret, err = has_model_params(model_dir_path, "lp", False) assert ret == True, err
def test_partitioned_eval(self): num_nodes = 500 num_rels = 10 num_edges = 10000 name = "partitioned_eval" generate_random_dataset(output_dir=self.output_dir / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, splits=[.9, .05, .05], num_partitions=8, partitioned_eval=True, feature_dim=10, task="nc") generate_configs_for_dataset(self.output_dir / Path(name), model_names=[ "gs_1_layer_emb", "gs_3_layer_emb", "gs_1_layer", "gs_3_layer" ], storage_names=["part_buffer"], training_names=["sync"], evaluation_names=["sync"], task="nc") run_configs(self.output_dir / Path(name), partitioned_eval=True) model_dir_path = self.output_dir / Path(name) run_configs(self.output_dir / Path(name), str(model_dir_path)) ret, err = has_model_params(model_dir_path, "nc", True) assert ret == True, err
def test_dm(self): name = "dm" shutil.copytree(self.output_dir / Path("test_graph"), self.output_dir / Path(name)) generate_configs_for_dataset(self.output_dir / Path(name), model_names=["distmult"], storage_names=["in_memory"], training_names=["sync"], evaluation_names=["sync"], task="lp") run_configs(self.output_dir / Path(name))
def test_gs(self): name = "gs" shutil.copytree(self.output_dir / Path("test_graph"), self.output_dir / Path(name)) generate_configs_for_dataset(self.output_dir / Path(name), model_names=["gs_1_layer", "gs_3_layer"], storage_names=["in_memory"], training_names=["sync"], evaluation_names=["sync"], task="nc") run_configs(self.output_dir / Path(name))
def test_async(self): name = "async" shutil.copytree(self.output_dir / Path("test_graph"), self.output_dir / Path(name)) generate_configs_for_dataset(self.output_dir / Path(name), model_names=["gs_1_layer"], storage_names=["part_buffer"], training_names=["async"], evaluation_names=["async"], task="nc") run_configs(self.output_dir / Path(name))
def test_sync_training(self): name = "sync_training" shutil.copytree(self.output_dir / Path("test_graph"), self.output_dir / Path(name)) generate_configs_for_dataset( self.output_dir / Path(name), model_names=["distmult"], storage_names=["part_buffer"], training_names=["sync_deg", "sync_filtered"], evaluation_names=["sync"], task="lp") run_configs(self.output_dir / Path(name))
def test_async_eval(self): name = "async_eval" shutil.copytree(self.output_dir / Path("test_graph"), self.output_dir / Path(name)) generate_configs_for_dataset( self.output_dir / Path(name), model_names=["distmult", "gs_1_layer"], storage_names=["in_memory"], training_names=["sync"], evaluation_names=["async", "async_deg", "async_filtered"], task="lp") run_configs(self.output_dir / Path(name))
def test_gs_uniform(self): name = "basic_gs_uniform" shutil.copytree(self.output_dir / Path("test_graph"), self.output_dir / Path(name)) generate_configs_for_dataset( self.output_dir / Path(name), model_names=["gs_1_layer_uniform", "gs_3_layer_uniform"], storage_names=["part_buffer"], training_names=["sync"], evaluation_names=["sync"], task="lp") run_configs(self.output_dir / Path(name))
def test_async(self): name = "async" shutil.copytree(self.output_dir / Path("test_graph"), self.output_dir / Path(name)) generate_configs_for_dataset(self.output_dir / Path(name), model_names=["gs_1_layer"], storage_names=["part_buffer"], training_names=["async"], evaluation_names=["async"], task="nc") run_configs(self.output_dir / Path(name)) model_dir_path = self.output_dir / Path(name) run_configs(self.output_dir / Path(name), str(model_dir_path)) ret, err = has_model_params(model_dir_path, "nc") assert ret == True, err
def test_emb(self): name = "emb" shutil.copytree(self.output_dir / Path("test_graph"), self.output_dir / Path(name)) generate_configs_for_dataset( self.output_dir / Path(name), model_names=["gs_1_layer_emb", "gs_3_layer_emb"], storage_names=["in_memory"], training_names=["sync"], evaluation_names=["sync"], task="nc") run_configs(self.output_dir / Path(name)) model_dir_path = self.output_dir / Path(name) run_configs(self.output_dir / Path(name), str(model_dir_path)) ret, err = has_model_params(model_dir_path, "nc", True) assert ret == True, err
def test_only_train(self): num_nodes = 100 num_rels = 10 num_edges = 1000 name = "only_train" generate_random_dataset(output_dir=self.output_dir / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, task="lp") generate_configs_for_dataset(self.output_dir / Path(name), model_names=["distmult"], storage_names=["in_memory"], training_names=["sync"], evaluation_names=["sync"], task="lp") run_configs(self.output_dir / Path(name))
def test_only_train_buffer_no_relations(self): num_nodes = 100 num_rels = 1 num_edges = 1000 name = "only_train_buffer_no_relations" generate_random_dataset(output_dir=self.output_dir / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, num_partitions=8, task="lp") generate_configs_for_dataset(self.output_dir / Path(name), model_names=["distmult"], storage_names=["part_buffer"], training_names=["sync"], evaluation_names=["sync"], task="lp") run_configs(self.output_dir / Path(name))
def test_only_train_no_relations(self): num_nodes = 500 num_rels = 1 num_edges = 10000 name = "only_train_no_relations" generate_random_dataset(output_dir=self.output_dir / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, feature_dim=10, task="nc") generate_configs_for_dataset(self.output_dir / Path(name), model_names=["gs_1_layer"], storage_names=["in_memory"], training_names=["sync"], evaluation_names=["sync"], task="nc") run_configs(self.output_dir / Path(name))
def test_dm(self): name = "dm" shutil.copytree(self.output_dir / Path("test_graph"), self.output_dir / Path(name)) generate_configs_for_dataset(self.output_dir / Path(name), model_names=["distmult"], storage_names=["in_memory"], training_names=["sync"], evaluation_names=["sync"], task="lp") run_configs(self.output_dir / Path(name)) model_dir_path = self.output_dir / Path("test_graph") / Path("model_0") ret, err = has_model_params(model_dir_path) assert ret == True, err run_configs(self.output_dir / Path(name)) model_dir_path = self.output_dir / Path("test_graph") / Path("model_1") ret, err = has_model_params(model_dir_path) assert ret == True, err for i in range(2, 11): model_dir_path = self.output_dir / Path("test_graph") / Path( "model_{}".format(i)) model_dir_path.mkdir(parents=True, exist_ok=True) model_dir_path = self.output_dir / Path("test_graph") / Path( "model_10") ret, err = has_model_params(model_dir_path) assert ret == False, err run_configs(self.output_dir / Path(name)) ret, err = has_model_params(model_dir_path) assert ret == True, err model_dir_path = self.output_dir / Path(name) run_configs(self.output_dir / Path(name), str(model_dir_path)) ret, err = has_model_params(model_dir_path) assert ret == True, err
def test_missing_dataset_yaml(self): generate_configs_for_dataset(self.output_dir, model_names=["distmult"], storage_names=["in_memory"], training_names=["sync"], evaluation_names=["sync"], task="lp") os.system("rm {}".format(self.output_dir / Path("dataset.yaml"))) for filename in os.listdir(self.output_dir): if filename.startswith("M-"): try: config_file = self.output_dir / Path(filename) config = loadConfig(config_file.__str__(), save=True) raise RuntimeError("Exception not thrown") except Exception as e: assert "expected to see dataset.yaml file" in e.__str__() shutil.rmtree(self.output_dir) os.makedirs(self.output_dir) OmegaConf.save(self.ds_config, self.output_dir / Path("dataset.yaml")) generate_configs_for_dataset(self.output_dir, model_names=["gs_1_layer"], storage_names=["part_buffer"], training_names=["sync"], evaluation_names=["sync"], task="nc") os.system("rm {}".format(self.output_dir / Path("dataset.yaml"))) for filename in os.listdir(self.output_dir): if filename.startswith("M-"): try: config_file = self.output_dir / Path(filename) config = loadConfig(config_file.__str__(), save=True) raise RuntimeError("Exception not thrown") except Exception as e: assert "expected to see dataset.yaml file" in e.__str__()
def test_no_valid_buffer(self): num_nodes = 100 num_rels = 10 num_edges = 1000 name = "no_valid_buffer" generate_random_dataset(output_dir=self.output_dir / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, splits=[.9, .1], num_partitions=8, partitioned_eval=True, task="lp") generate_configs_for_dataset(self.output_dir / Path(name), model_names=["distmult"], storage_names=["part_buffer"], training_names=["sync"], evaluation_names=["sync"], task="lp") run_configs(self.output_dir / Path(name), partitioned_eval=True)
def test_load_config(self): generate_configs_for_dataset(self.output_dir, model_names=["distmult, gs_1_layer, gs_3_layer, gat_1_layer, gat_3_layer"], storage_names=["in_memory, part_buffer"], training_names=["sync"], evaluation_names=["sync"], task="lp") # check that each generated config can be parsed and it's members accessed. for filename in os.listdir(self.output_dir): if filename.startswith("M-"): config_file = self.output_dir / Path(filename) config = loadConfig(config_file.__str__(), save=True) loaded_full_config = loadConfig((config.storage.model_dir / Path("full_config.yaml")).__str__()) assert loaded_full_config.model.random_seed == config.model.random_seed assert config.model is not None assert config.storage is not None assert config.training is not None assert config.evaluation is not None assert config.model.encoder is not None assert config.model.decoder is not None assert config.storage.dataset.dataset_dir.rstrip("/") == self.output_dir.__str__() assert config.storage.dataset.num_edges == 1000 assert config.storage.dataset.num_nodes == 100 assert config.storage.dataset.num_relations == 1 assert config.storage.dataset.num_train == 100 assert config.storage.dataset.num_valid == 10 assert config.storage.dataset.num_test == 10 assert config.training is not None assert config.evaluation is not None config.model.random_seed = 0 assert config.model.random_seed == 0 # reset directory shutil.rmtree(self.output_dir) os.makedirs(self.output_dir) OmegaConf.save(self.ds_config, self.output_dir / Path("dataset.yaml")) generate_configs_for_dataset(self.output_dir, model_names=["gs_1_layer", "gs_3_layer", "gat_1_layer", "gat_3_layer"], storage_names=["in_memory", "part_buffer"], training_names=["sync"], evaluation_names=["sync"], task="nc") # check that each generated config can be parsed and it's members accessed. for filename in os.listdir(self.output_dir): if filename.startswith("M-"): config_file = self.output_dir / Path(filename) config = loadConfig(config_file.__str__(), save=True) loaded_full_config = loadConfig((config.storage.model_dir / Path("full_config.yaml")).__str__()) assert loaded_full_config.model.random_seed == config.model.random_seed assert config.model is not None assert config.storage is not None assert config.training is not None assert config.evaluation is not None assert config.model.encoder is not None assert config.model.decoder is not None assert config.storage.dataset.dataset_dir.rstrip("/") == self.output_dir.__str__() assert config.storage.dataset.num_edges == 1000 assert config.storage.dataset.num_nodes == 100 assert config.storage.dataset.num_relations == 1 assert config.storage.dataset.num_train == 100 assert config.storage.dataset.num_valid == 10 assert config.storage.dataset.num_test == 10 assert config.training is not None assert config.evaluation is not None config.model.random_seed = 0 assert config.model.random_seed == 0