def test_random_link_split_on_undirected_hetero_data(): data = HeteroData() data['p'].x = torch.arange(100) data['p', 'p'].edge_index = get_edge_index(100, 100, 500) data['p', 'p'].edge_index = to_undirected(data['p', 'p'].edge_index) transform = RandomLinkSplit(is_undirected=True, edge_types=('p', 'p')) train_data, val_data, test_data = transform(data) assert train_data['p', 'p'].is_undirected() transform = RandomLinkSplit(is_undirected=True, edge_types=('p', 'p'), rev_edge_types=('p', 'p')) train_data, val_data, test_data = transform(data) assert train_data['p', 'p'].is_undirected()
def test_gae(): model = GAE(encoder=lambda x: x) model.reset_parameters() x = torch.Tensor([[1, -1], [1, 2], [2, 1]]) z = model.encode(x) assert z.tolist() == x.tolist() adj = model.decoder.forward_all(z) assert adj.tolist() == torch.sigmoid( torch.Tensor([[+2, -1, +1], [-1, +5, +4], [+1, +4, +5]])).tolist() edge_index = torch.tensor([[0, 1], [1, 2]]) value = model.decode(z, edge_index) assert value.tolist() == torch.sigmoid(torch.Tensor([-1, 4])).tolist() edge_index = torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) data = Data(edge_index=edge_index, num_nodes=11) transform = RandomLinkSplit(split_labels=True, add_negative_train_samples=False) train_data, val_data, test_data = transform(data) z = torch.randn(11, 16) loss = model.recon_loss(z, train_data.pos_edge_label_index) assert loss.item() > 0 auc, ap = model.test(z, val_data.pos_edge_label_index, val_data.neg_edge_label_index) assert auc >= 0 and auc <= 1 and ap >= 0 and ap <= 1
def process(self): transform = RandomLinkSplit(num_val=0.05, num_test=0.1, is_undirected=True, split_labels=True) train_data, val_data, test_data = transform(self.data) self._max_z = 0 # Collect a list of subgraphs for training, validation and testing: train_pos_data_list = self.extract_enclosing_subgraphs( train_data.edge_index, train_data.pos_edge_label_index, 1) train_neg_data_list = self.extract_enclosing_subgraphs( train_data.edge_index, train_data.neg_edge_label_index, 0) val_pos_data_list = self.extract_enclosing_subgraphs( val_data.edge_index, val_data.pos_edge_label_index, 1) val_neg_data_list = self.extract_enclosing_subgraphs( val_data.edge_index, val_data.neg_edge_label_index, 0) test_pos_data_list = self.extract_enclosing_subgraphs( test_data.edge_index, test_data.pos_edge_label_index, 1) test_neg_data_list = self.extract_enclosing_subgraphs( test_data.edge_index, test_data.neg_edge_label_index, 0) # Convert node labeling to one-hot features. for data in chain(train_pos_data_list, train_neg_data_list, val_pos_data_list, val_neg_data_list, test_pos_data_list, test_neg_data_list): # We solely learn links from structure, dropping any node features: data.x = F.one_hot(data.z, self._max_z + 1).to(torch.float) torch.save(self.collate(train_pos_data_list + train_neg_data_list), self.processed_paths[0]) torch.save(self.collate(val_pos_data_list + val_neg_data_list), self.processed_paths[1]) torch.save(self.collate(test_pos_data_list + test_neg_data_list), self.processed_paths[2])
def test_random_link_split_on_hetero_data(): data = HeteroData() data['p'].x = torch.arange(100) data['a'].x = torch.arange(100, 300) data['p', 'p'].edge_index = get_edge_index(100, 100, 500) data['p', 'p'].edge_index = to_undirected(data['p', 'p'].edge_index) data['p', 'p'].edge_attr = torch.arange(data['p', 'p'].num_edges) data['p', 'a'].edge_index = get_edge_index(100, 200, 1000) data['p', 'a'].edge_attr = torch.arange(500, 1500) data['a', 'p'].edge_index = data['p', 'a'].edge_index.flip([0]) data['a', 'p'].edge_attr = torch.arange(1500, 2500) transform = RandomLinkSplit(num_val=0.2, num_test=0.2, is_undirected=True, edge_types=('p', 'p')) train_data, val_data, test_data = transform(data) assert len(train_data['p']) == 1 assert len(train_data['a']) == 1 assert len(train_data['p', 'p']) == 4 assert len(train_data['p', 'a']) == 2 assert len(train_data['a', 'p']) == 2 assert is_undirected(train_data['p', 'p'].edge_index, train_data['p', 'p'].edge_attr) assert is_undirected(val_data['p', 'p'].edge_index, val_data['p', 'p'].edge_attr) assert is_undirected(test_data['p', 'p'].edge_index, test_data['p', 'p'].edge_attr) transform = RandomLinkSplit(num_val=0.2, num_test=0.2, edge_types=('p', 'a'), rev_edge_types=('a', 'p')) train_data, val_data, test_data = transform(data) assert len(train_data['p']) == 1 assert len(train_data['a']) == 1 assert len(train_data['p', 'p']) == 2 assert len(train_data['p', 'a']) == 4 assert len(train_data['a', 'p']) == 2 assert train_data['p', 'a'].edge_index.size() == (2, 600) assert train_data['p', 'a'].edge_attr.size() == (600, ) assert train_data['p', 'a'].edge_attr.min() >= 500 assert train_data['p', 'a'].edge_attr.max() <= 1500 assert train_data['a', 'p'].edge_index.size() == (2, 600) assert train_data['a', 'p'].edge_attr.size() == (600, ) assert train_data['a', 'p'].edge_attr.min() >= 500 assert train_data['a', 'p'].edge_attr.max() <= 1500 assert train_data['p', 'a'].edge_label_index.size() == (2, 1200) assert train_data['p', 'a'].edge_label.size() == (1200, ) assert val_data['p', 'a'].edge_index.size() == (2, 600) assert val_data['p', 'a'].edge_attr.size() == (600, ) assert val_data['p', 'a'].edge_attr.min() >= 500 assert val_data['p', 'a'].edge_attr.max() <= 1500 assert val_data['a', 'p'].edge_index.size() == (2, 600) assert val_data['a', 'p'].edge_attr.size() == (600, ) assert val_data['a', 'p'].edge_attr.min() >= 500 assert val_data['a', 'p'].edge_attr.max() <= 1500 assert val_data['p', 'a'].edge_label_index.size() == (2, 400) assert val_data['p', 'a'].edge_label.size() == (400, ) assert test_data['p', 'a'].edge_index.size() == (2, 800) assert test_data['p', 'a'].edge_attr.size() == (800, ) assert test_data['p', 'a'].edge_attr.min() >= 500 assert test_data['p', 'a'].edge_attr.max() <= 1500 assert test_data['a', 'p'].edge_index.size() == (2, 800) assert test_data['a', 'p'].edge_attr.size() == (800, ) assert test_data['a', 'p'].edge_attr.min() >= 500 assert test_data['a', 'p'].edge_attr.max() <= 1500 assert test_data['p', 'a'].edge_label_index.size() == (2, 400) assert test_data['p', 'a'].edge_label.size() == (400, ) transform = RandomLinkSplit(num_val=0.2, num_test=0.2, is_undirected=True, edge_types=[('p', 'p'), ('p', 'a')], rev_edge_types=[None, ('a', 'p')]) train_data, val_data, test_data = transform(data) assert len(train_data['p']) == 1 assert len(train_data['a']) == 1 assert len(train_data['p', 'p']) == 4 assert len(train_data['p', 'a']) == 4 assert len(train_data['a', 'p']) == 2 assert is_undirected(train_data['p', 'p'].edge_index, train_data['p', 'p'].edge_attr) assert train_data['p', 'a'].edge_index.size() == (2, 600) assert train_data['a', 'p'].edge_index.size() == (2, 600)
def test_random_link_split(): assert str(RandomLinkSplit()) == ('RandomLinkSplit(' 'num_val=0.1, num_test=0.2)') edge_index = torch.tensor([[0, 1, 1, 2, 2, 3, 3, 4, 4, 5], [1, 0, 2, 1, 3, 2, 4, 3, 5, 4]]) edge_attr = torch.randn(edge_index.size(1), 3) data = Data(edge_index=edge_index, edge_attr=edge_attr, num_nodes=100) # No test split: transform = RandomLinkSplit(num_val=2, num_test=0, is_undirected=True) train_data, val_data, test_data = transform(data) assert len(train_data) == 5 assert train_data.num_nodes == 100 assert train_data.edge_index.size() == (2, 6) assert train_data.edge_attr.size() == (6, 3) assert train_data.edge_label_index.size(1) == 6 assert train_data.edge_label.size(0) == 6 assert len(val_data) == 5 assert val_data.num_nodes == 100 assert val_data.edge_index.size() == (2, 6) assert val_data.edge_attr.size() == (6, 3) assert val_data.edge_label_index.size(1) == 4 assert val_data.edge_label.size(0) == 4 assert len(test_data) == 5 assert test_data.num_nodes == 100 assert test_data.edge_index.size() == (2, 10) assert test_data.edge_attr.size() == (10, 3) assert test_data.edge_label_index.size() == (2, 0) assert test_data.edge_label.size() == (0, ) # Percentage split: transform = RandomLinkSplit(num_val=0.2, num_test=0.2, neg_sampling_ratio=2.0, is_undirected=False) train_data, val_data, test_data = transform(data) assert len(train_data) == 5 assert train_data.num_nodes == 100 assert train_data.edge_index.size() == (2, 6) assert train_data.edge_attr.size() == (6, 3) assert train_data.edge_label_index.size(1) == 18 assert train_data.edge_label.size(0) == 18 assert len(val_data) == 5 assert val_data.num_nodes == 100 assert val_data.edge_index.size() == (2, 6) assert val_data.edge_attr.size() == (6, 3) assert val_data.edge_label_index.size(1) == 6 assert val_data.edge_label.size(0) == 6 assert len(test_data) == 5 assert test_data.num_nodes == 100 assert test_data.edge_index.size() == (2, 8) assert test_data.edge_attr.size() == (8, 3) assert test_data.edge_label_index.size(1) == 6 assert test_data.edge_label.size(0) == 6 # Disjoint training split: transform = RandomLinkSplit(num_val=0.2, num_test=0.2, is_undirected=False, disjoint_train_ratio=0.5) train_data, val_data, test_data = transform(data) assert len(train_data) == 5 assert train_data.num_nodes == 100 assert train_data.edge_index.size() == (2, 3) assert train_data.edge_attr.size() == (3, 3) assert train_data.edge_label_index.size(1) == 6 assert train_data.edge_label.size(0) == 6
dst_mapping=movie_mapping, encoders={'rating': IdentityEncoder(dtype=torch.long)}, ) data = HeteroData() data['user'].num_nodes = len(user_mapping) # Users do not have any features. data['movie'].x = movie_x data['user', 'rates', 'movie'].edge_index = edge_index data['user', 'rates', 'movie'].edge_label = edge_label print(data) # We can now convert `data` into an appropriate format for training a # graph-based machine learning model: # 1. Add a reverse ('movie', 'rev_rates', 'user') relation for message passing. data = ToUndirected()(data) del data['movie', 'rev_rates', 'user'].edge_label # Remove "reverse" label. # 2. Perform a link-level split into training, validation, and test edges. transform = RandomLinkSplit( num_val=0.05, num_test=0.1, neg_sampling_ratio=0.0, edge_types=[('user', 'rates', 'movie')], rev_edge_types=[('movie', 'rev_rates', 'user')], ) train_data, val_data, test_data = transform(data) print(train_data) print(val_data) print(test_data)