Esempio n. 1
0
def test_random_link_split_on_undirected_hetero_data():
    data = HeteroData()
    data['p'].x = torch.arange(100)
    data['p', 'p'].edge_index = get_edge_index(100, 100, 500)
    data['p', 'p'].edge_index = to_undirected(data['p', 'p'].edge_index)

    transform = RandomLinkSplit(is_undirected=True, edge_types=('p', 'p'))
    train_data, val_data, test_data = transform(data)
    assert train_data['p', 'p'].is_undirected()

    transform = RandomLinkSplit(is_undirected=True, edge_types=('p', 'p'),
                                rev_edge_types=('p', 'p'))
    train_data, val_data, test_data = transform(data)
    assert train_data['p', 'p'].is_undirected()
Esempio n. 2
0
def test_gae():
    model = GAE(encoder=lambda x: x)
    model.reset_parameters()

    x = torch.Tensor([[1, -1], [1, 2], [2, 1]])
    z = model.encode(x)
    assert z.tolist() == x.tolist()

    adj = model.decoder.forward_all(z)
    assert adj.tolist() == torch.sigmoid(
        torch.Tensor([[+2, -1, +1], [-1, +5, +4], [+1, +4, +5]])).tolist()

    edge_index = torch.tensor([[0, 1], [1, 2]])
    value = model.decode(z, edge_index)
    assert value.tolist() == torch.sigmoid(torch.Tensor([-1, 4])).tolist()

    edge_index = torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                               [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
    data = Data(edge_index=edge_index, num_nodes=11)
    transform = RandomLinkSplit(split_labels=True,
                                add_negative_train_samples=False)
    train_data, val_data, test_data = transform(data)

    z = torch.randn(11, 16)
    loss = model.recon_loss(z, train_data.pos_edge_label_index)
    assert loss.item() > 0

    auc, ap = model.test(z, val_data.pos_edge_label_index,
                         val_data.neg_edge_label_index)
    assert auc >= 0 and auc <= 1 and ap >= 0 and ap <= 1
Esempio n. 3
0
    def process(self):
        transform = RandomLinkSplit(num_val=0.05,
                                    num_test=0.1,
                                    is_undirected=True,
                                    split_labels=True)
        train_data, val_data, test_data = transform(self.data)

        self._max_z = 0

        # Collect a list of subgraphs for training, validation and testing:
        train_pos_data_list = self.extract_enclosing_subgraphs(
            train_data.edge_index, train_data.pos_edge_label_index, 1)
        train_neg_data_list = self.extract_enclosing_subgraphs(
            train_data.edge_index, train_data.neg_edge_label_index, 0)

        val_pos_data_list = self.extract_enclosing_subgraphs(
            val_data.edge_index, val_data.pos_edge_label_index, 1)
        val_neg_data_list = self.extract_enclosing_subgraphs(
            val_data.edge_index, val_data.neg_edge_label_index, 0)

        test_pos_data_list = self.extract_enclosing_subgraphs(
            test_data.edge_index, test_data.pos_edge_label_index, 1)
        test_neg_data_list = self.extract_enclosing_subgraphs(
            test_data.edge_index, test_data.neg_edge_label_index, 0)

        # Convert node labeling to one-hot features.
        for data in chain(train_pos_data_list, train_neg_data_list,
                          val_pos_data_list, val_neg_data_list,
                          test_pos_data_list, test_neg_data_list):
            # We solely learn links from structure, dropping any node features:
            data.x = F.one_hot(data.z, self._max_z + 1).to(torch.float)

        torch.save(self.collate(train_pos_data_list + train_neg_data_list),
                   self.processed_paths[0])
        torch.save(self.collate(val_pos_data_list + val_neg_data_list),
                   self.processed_paths[1])
        torch.save(self.collate(test_pos_data_list + test_neg_data_list),
                   self.processed_paths[2])
Esempio n. 4
0
def test_random_link_split_on_hetero_data():
    data = HeteroData()

    data['p'].x = torch.arange(100)
    data['a'].x = torch.arange(100, 300)

    data['p', 'p'].edge_index = get_edge_index(100, 100, 500)
    data['p', 'p'].edge_index = to_undirected(data['p', 'p'].edge_index)
    data['p', 'p'].edge_attr = torch.arange(data['p', 'p'].num_edges)
    data['p', 'a'].edge_index = get_edge_index(100, 200, 1000)
    data['p', 'a'].edge_attr = torch.arange(500, 1500)
    data['a', 'p'].edge_index = data['p', 'a'].edge_index.flip([0])
    data['a', 'p'].edge_attr = torch.arange(1500, 2500)

    transform = RandomLinkSplit(num_val=0.2, num_test=0.2, is_undirected=True,
                                edge_types=('p', 'p'))
    train_data, val_data, test_data = transform(data)

    assert len(train_data['p']) == 1
    assert len(train_data['a']) == 1
    assert len(train_data['p', 'p']) == 4
    assert len(train_data['p', 'a']) == 2
    assert len(train_data['a', 'p']) == 2

    assert is_undirected(train_data['p', 'p'].edge_index,
                         train_data['p', 'p'].edge_attr)
    assert is_undirected(val_data['p', 'p'].edge_index,
                         val_data['p', 'p'].edge_attr)
    assert is_undirected(test_data['p', 'p'].edge_index,
                         test_data['p', 'p'].edge_attr)

    transform = RandomLinkSplit(num_val=0.2, num_test=0.2,
                                edge_types=('p', 'a'),
                                rev_edge_types=('a', 'p'))
    train_data, val_data, test_data = transform(data)

    assert len(train_data['p']) == 1
    assert len(train_data['a']) == 1
    assert len(train_data['p', 'p']) == 2
    assert len(train_data['p', 'a']) == 4
    assert len(train_data['a', 'p']) == 2

    assert train_data['p', 'a'].edge_index.size() == (2, 600)
    assert train_data['p', 'a'].edge_attr.size() == (600, )
    assert train_data['p', 'a'].edge_attr.min() >= 500
    assert train_data['p', 'a'].edge_attr.max() <= 1500
    assert train_data['a', 'p'].edge_index.size() == (2, 600)
    assert train_data['a', 'p'].edge_attr.size() == (600, )
    assert train_data['a', 'p'].edge_attr.min() >= 500
    assert train_data['a', 'p'].edge_attr.max() <= 1500
    assert train_data['p', 'a'].edge_label_index.size() == (2, 1200)
    assert train_data['p', 'a'].edge_label.size() == (1200, )

    assert val_data['p', 'a'].edge_index.size() == (2, 600)
    assert val_data['p', 'a'].edge_attr.size() == (600, )
    assert val_data['p', 'a'].edge_attr.min() >= 500
    assert val_data['p', 'a'].edge_attr.max() <= 1500
    assert val_data['a', 'p'].edge_index.size() == (2, 600)
    assert val_data['a', 'p'].edge_attr.size() == (600, )
    assert val_data['a', 'p'].edge_attr.min() >= 500
    assert val_data['a', 'p'].edge_attr.max() <= 1500
    assert val_data['p', 'a'].edge_label_index.size() == (2, 400)
    assert val_data['p', 'a'].edge_label.size() == (400, )

    assert test_data['p', 'a'].edge_index.size() == (2, 800)
    assert test_data['p', 'a'].edge_attr.size() == (800, )
    assert test_data['p', 'a'].edge_attr.min() >= 500
    assert test_data['p', 'a'].edge_attr.max() <= 1500
    assert test_data['a', 'p'].edge_index.size() == (2, 800)
    assert test_data['a', 'p'].edge_attr.size() == (800, )
    assert test_data['a', 'p'].edge_attr.min() >= 500
    assert test_data['a', 'p'].edge_attr.max() <= 1500
    assert test_data['p', 'a'].edge_label_index.size() == (2, 400)
    assert test_data['p', 'a'].edge_label.size() == (400, )

    transform = RandomLinkSplit(num_val=0.2, num_test=0.2, is_undirected=True,
                                edge_types=[('p', 'p'), ('p', 'a')],
                                rev_edge_types=[None, ('a', 'p')])
    train_data, val_data, test_data = transform(data)

    assert len(train_data['p']) == 1
    assert len(train_data['a']) == 1
    assert len(train_data['p', 'p']) == 4
    assert len(train_data['p', 'a']) == 4
    assert len(train_data['a', 'p']) == 2

    assert is_undirected(train_data['p', 'p'].edge_index,
                         train_data['p', 'p'].edge_attr)
    assert train_data['p', 'a'].edge_index.size() == (2, 600)
    assert train_data['a', 'p'].edge_index.size() == (2, 600)
Esempio n. 5
0
def test_random_link_split():
    assert str(RandomLinkSplit()) == ('RandomLinkSplit('
                                      'num_val=0.1, num_test=0.2)')

    edge_index = torch.tensor([[0, 1, 1, 2, 2, 3, 3, 4, 4, 5],
                               [1, 0, 2, 1, 3, 2, 4, 3, 5, 4]])
    edge_attr = torch.randn(edge_index.size(1), 3)

    data = Data(edge_index=edge_index, edge_attr=edge_attr, num_nodes=100)

    # No test split:
    transform = RandomLinkSplit(num_val=2, num_test=0, is_undirected=True)
    train_data, val_data, test_data = transform(data)

    assert len(train_data) == 5
    assert train_data.num_nodes == 100
    assert train_data.edge_index.size() == (2, 6)
    assert train_data.edge_attr.size() == (6, 3)
    assert train_data.edge_label_index.size(1) == 6
    assert train_data.edge_label.size(0) == 6

    assert len(val_data) == 5
    assert val_data.num_nodes == 100
    assert val_data.edge_index.size() == (2, 6)
    assert val_data.edge_attr.size() == (6, 3)
    assert val_data.edge_label_index.size(1) == 4
    assert val_data.edge_label.size(0) == 4

    assert len(test_data) == 5
    assert test_data.num_nodes == 100
    assert test_data.edge_index.size() == (2, 10)
    assert test_data.edge_attr.size() == (10, 3)
    assert test_data.edge_label_index.size() == (2, 0)
    assert test_data.edge_label.size() == (0, )

    # Percentage split:
    transform = RandomLinkSplit(num_val=0.2, num_test=0.2,
                                neg_sampling_ratio=2.0, is_undirected=False)
    train_data, val_data, test_data = transform(data)

    assert len(train_data) == 5
    assert train_data.num_nodes == 100
    assert train_data.edge_index.size() == (2, 6)
    assert train_data.edge_attr.size() == (6, 3)
    assert train_data.edge_label_index.size(1) == 18
    assert train_data.edge_label.size(0) == 18

    assert len(val_data) == 5
    assert val_data.num_nodes == 100
    assert val_data.edge_index.size() == (2, 6)
    assert val_data.edge_attr.size() == (6, 3)
    assert val_data.edge_label_index.size(1) == 6
    assert val_data.edge_label.size(0) == 6

    assert len(test_data) == 5
    assert test_data.num_nodes == 100
    assert test_data.edge_index.size() == (2, 8)
    assert test_data.edge_attr.size() == (8, 3)
    assert test_data.edge_label_index.size(1) == 6
    assert test_data.edge_label.size(0) == 6

    # Disjoint training split:
    transform = RandomLinkSplit(num_val=0.2, num_test=0.2, is_undirected=False,
                                disjoint_train_ratio=0.5)
    train_data, val_data, test_data = transform(data)

    assert len(train_data) == 5
    assert train_data.num_nodes == 100
    assert train_data.edge_index.size() == (2, 3)
    assert train_data.edge_attr.size() == (3, 3)
    assert train_data.edge_label_index.size(1) == 6
    assert train_data.edge_label.size(0) == 6
Esempio n. 6
0
    dst_mapping=movie_mapping,
    encoders={'rating': IdentityEncoder(dtype=torch.long)},
)

data = HeteroData()
data['user'].num_nodes = len(user_mapping)  # Users do not have any features.
data['movie'].x = movie_x
data['user', 'rates', 'movie'].edge_index = edge_index
data['user', 'rates', 'movie'].edge_label = edge_label
print(data)

# We can now convert `data` into an appropriate format for training a
# graph-based machine learning model:

# 1. Add a reverse ('movie', 'rev_rates', 'user') relation for message passing.
data = ToUndirected()(data)
del data['movie', 'rev_rates', 'user'].edge_label  # Remove "reverse" label.

# 2. Perform a link-level split into training, validation, and test edges.
transform = RandomLinkSplit(
    num_val=0.05,
    num_test=0.1,
    neg_sampling_ratio=0.0,
    edge_types=[('user', 'rates', 'movie')],
    rev_edge_types=[('movie', 'rev_rates', 'user')],
)
train_data, val_data, test_data = transform(data)
print(train_data)
print(val_data)
print(test_data)