def init_dataset_dir(self, name): num_nodes = 100 num_rels = 10 num_edges = 1000 generate_random_dataset(output_dir=Path(self.base_dir) / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, splits=[.9, .05, .05], task="lp") generate_configs_for_dataset(Path(self.base_dir) / Path(name), model_names=["distmult"], storage_names=["in_memory"], training_names=["sync"], evaluation_names=["sync"], task="lp") for filename in os.listdir(Path(self.base_dir) / Path(name)): if filename.startswith("M-"): self.config_file = Path( self.base_dir) / Path(name) / Path(filename) config = m.config.loadConfig(self.config_file.__str__(), True) m.manager.marius_train(config)
def test_partitioned_eval(self): num_nodes = 500 num_rels = 10 num_edges = 10000 name = "partitioned_eval" generate_random_dataset(output_dir=self.output_dir / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, splits=[.9, .05, .05], num_partitions=8, partitioned_eval=True, feature_dim=10, task="nc") generate_configs_for_dataset(self.output_dir / Path(name), model_names=[ "gs_1_layer_emb", "gs_3_layer_emb", "gs_1_layer", "gs_3_layer" ], storage_names=["part_buffer"], training_names=["sync"], evaluation_names=["sync"], task="nc") run_configs(self.output_dir / Path(name), partitioned_eval=True) model_dir_path = self.output_dir / Path(name) run_configs(self.output_dir / Path(name), str(model_dir_path)) ret, err = has_model_params(model_dir_path, "nc", True) assert ret == True, err
def test_sequential(self): num_nodes = 500 num_rels = 1 num_edges = 10000 name = "sequential_ordering" generate_random_dataset(output_dir=self.output_dir / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, splits=[.1, .05, .05], num_partitions=8, partitioned_eval=True, sequential_train_nodes=True, feature_dim=10, task="nc") generate_configs_for_dataset(self.output_dir / Path(name), model_names=[ "gs_1_layer_emb", "gs_3_layer_emb", "gs_1_layer", "gs_3_layer" ], storage_names=["part_buffer"], training_names=["sync"], evaluation_names=["sync"], task="nc") run_configs(self.output_dir / Path(name), partitioned_eval=True, sequential_train_nodes=True) run_configs(self.output_dir / Path(name), partitioned_eval=False, sequential_train_nodes=True)
def test_partitioned_eval(self): num_nodes = 100 num_rels = 1 num_edges = 1000 name = "partitioned_eval" generate_random_dataset(output_dir=self.output_dir / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, splits=[.9, .05, .05], num_partitions=8, partitioned_eval=True, task="lp") generate_configs_for_dataset( self.output_dir / Path(name), model_names=["distmult"], storage_names=["part_buffer"], training_names=["sync"], evaluation_names=["sync", "async", "async_deg", "async_filtered"], task="lp") run_configs(self.output_dir / Path(name), partitioned_eval=True) model_dir_path = self.output_dir / Path(name) run_configs(self.output_dir / Path(name), str(model_dir_path)) ret, err = has_model_params(model_dir_path, "lp", False) assert ret == True, err
def setUp(self): if not Path(TMP_TEST_DIR).exists(): Path(TMP_TEST_DIR).mkdir() base_dir = TMP_TEST_DIR num_nodes = 100 num_rels = 10 num_edges = 1000 name = "basic_lp" generate_random_dataset(output_dir=base_dir / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, splits=[.9, .05, .05], task="lp") generate_configs_for_dataset(base_dir / Path(name), model_names=["distmult"], storage_names=["in_memory"], training_names=["sync"], evaluation_names=["sync"], task="lp") for filename in os.listdir(base_dir / Path(name)): if filename.startswith("M-"): self.config_file = base_dir / Path(name) / Path(filename) config = m.config.loadConfig(self.config_file.__str__(), True) m.manager.marius_train(config)
def setUp(self): if not self.output_dir.exists(): os.makedirs(self.output_dir) num_nodes = 100 num_rels = 10 num_edges = 1000 name = "test_graph" generate_random_dataset(output_dir=self.output_dir / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, splits=[.9, .05, .05], task="lp")
def setUp(self): if not self.output_dir.exists(): os.makedirs(self.output_dir) num_nodes = 500 num_rels = 1 num_edges = 10000 name = "test_graph" generate_random_dataset(output_dir=self.output_dir / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, splits=[.9, .05, .05], num_partitions=8, feature_dim=10, task="nc")
def test_only_train(self): num_nodes = 100 num_rels = 10 num_edges = 1000 name = "only_train" generate_random_dataset(output_dir=self.output_dir / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, task="lp") generate_configs_for_dataset(self.output_dir / Path(name), model_names=["distmult"], storage_names=["in_memory"], training_names=["sync"], evaluation_names=["sync"], task="lp") run_configs(self.output_dir / Path(name))
def test_only_train_buffer_no_relations(self): num_nodes = 100 num_rels = 1 num_edges = 1000 name = "only_train_buffer_no_relations" generate_random_dataset(output_dir=self.output_dir / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, num_partitions=8, task="lp") generate_configs_for_dataset(self.output_dir / Path(name), model_names=["distmult"], storage_names=["part_buffer"], training_names=["sync"], evaluation_names=["sync"], task="lp") run_configs(self.output_dir / Path(name))
def test_only_train_no_relations(self): num_nodes = 500 num_rels = 1 num_edges = 10000 name = "only_train_no_relations" generate_random_dataset(output_dir=self.output_dir / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, feature_dim=10, task="nc") generate_configs_for_dataset(self.output_dir / Path(name), model_names=["gs_1_layer"], storage_names=["in_memory"], training_names=["sync"], evaluation_names=["sync"], task="nc") run_configs(self.output_dir / Path(name))
def test_no_valid_buffer(self): num_nodes = 100 num_rels = 10 num_edges = 1000 name = "no_valid_buffer" generate_random_dataset(output_dir=self.output_dir / Path(name), num_nodes=num_nodes, num_edges=num_edges, num_rels=num_rels, splits=[.9, .1], num_partitions=8, partitioned_eval=True, task="lp") generate_configs_for_dataset(self.output_dir / Path(name), model_names=["distmult"], storage_names=["part_buffer"], training_names=["sync"], evaluation_names=["sync"], task="lp") run_configs(self.output_dir / Path(name), partitioned_eval=True)