Ejemplo n.º 1
0
    def __call__(self, data: HeteroData) -> HeteroData:
        edge_types = data.edge_types  # save original edge types
        data.metapath_dict = {}

        for j, metapath in enumerate(self.metapaths):
            for edge_type in metapath:
                assert data._to_canonical(
                    edge_type) in edge_types, f"'{edge_type}' not present"

            edge_type = metapath[0]
            adj1 = SparseTensor.from_edge_index(
                edge_index=data[edge_type].edge_index,
                sparse_sizes=data[edge_type].size())

            for i, edge_type in enumerate(metapath[1:]):
                adj2 = SparseTensor.from_edge_index(
                    edge_index=data[edge_type].edge_index,
                    sparse_sizes=data[edge_type].size())
                adj1 = adj1 @ adj2

            row, col, _ = adj1.coo()
            new_edge_type = (metapath[0][0], f'metapath_{j}', metapath[-1][-1])
            data[new_edge_type].edge_index = torch.vstack([row, col])
            data.metapath_dict[new_edge_type] = metapath

        if self.drop_orig_edges:
            for i in edge_types:
                if self.keep_same_node_type and i[0] == i[-1]:
                    continue
                else:
                    del data[i]

        return data
Ejemplo n.º 2
0
    def __call__(self, data: HeteroData) -> HeteroData:
        edge_types = data.edge_types  # save original edge types
        data.metapath_dict = {}

        for j, metapath in enumerate(self.metapaths):
            for edge_type in metapath:
                assert data._to_canonical(
                    edge_type) in edge_types, f"'{edge_type}' not present"

            edge_type = metapath[0]
            edge_weight = self._get_edge_weight(data, edge_type)
            adj1 = SparseTensor.from_edge_index(
                edge_index=data[edge_type].edge_index,
                sparse_sizes=data[edge_type].size(), edge_attr=edge_weight)

            if self.max_sample is not None:
                adj1 = self.sample_adj(adj1)

            for i, edge_type in enumerate(metapath[1:]):
                edge_weight = self._get_edge_weight(data, edge_type)
                adj2 = SparseTensor.from_edge_index(
                    edge_index=data[edge_type].edge_index,
                    sparse_sizes=data[edge_type].size(), edge_attr=edge_weight)

                adj1 = adj1 @ adj2

                if self.max_sample is not None:
                    adj1 = self.sample_adj(adj1)

            row, col, edge_weight = adj1.coo()
            new_edge_type = (metapath[0][0], f'metapath_{j}', metapath[-1][-1])
            data[new_edge_type].edge_index = torch.vstack([row, col])
            if self.weighted:
                data[new_edge_type].edge_weight = edge_weight
            data.metapath_dict[new_edge_type] = metapath

        if self.drop_orig_edges:
            for i in edge_types:
                if self.keep_same_node_type and i[0] == i[-1]:
                    continue
                else:
                    del data[i]

        # remove nodes not connected by any edge type.
        if self.drop_unconnected_nodes:
            new_edge_types = data.edge_types
            node_types = data.node_types
            connected_nodes = set()
            for i in new_edge_types:
                connected_nodes.add(i[0])
                connected_nodes.add(i[-1])
            for node in node_types:
                if node not in connected_nodes:
                    del data[node]

        return data
Ejemplo n.º 3
0
    def __init__(self,
                 edge_index: torch.Tensor,
                 sizes: List[int],
                 node_idx: Optional[torch.Tensor] = None,
                 num_nodes: Optional[int] = None,
                 flow: str = "source_to_target",
                 **kwargs):

        N = int(edge_index.max() + 1) if num_nodes is None else num_nodes
        edge_attr = torch.arange(edge_index.size(1))
        adj = SparseTensor.from_edge_index(edge_index,
                                           edge_attr, (N, N),
                                           is_sorted=False)
        adj = adj.t() if flow == 'source_to_target' else adj
        self.adj = adj.to('cpu')

        if node_idx is None:
            node_idx = torch.arange(N)
        elif node_idx.dtype == torch.bool:
            node_idx = node_idx.nonzero().view(-1)

        self.sizes = sizes
        self.flow = flow
        assert self.flow in ['source_to_target', 'target_to_source']

        super(NeighborSampler, self).__init__(node_idx.tolist(),
                                              collate_fn=self.sample,
                                              **kwargs)
Ejemplo n.º 4
0
def train(epoch):
    model.train()

    pbar = tqdm(total=len(train_loader))
    pbar.set_description(f'Training epoch: {epoch:03d}')

    total_loss = total_examples = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()

        # Memory-efficient aggregations:
        adj_t = SparseTensor.from_edge_index(data.edge_index).t()
        out = model(data.x, adj_t)[data.train_mask]
        loss = F.cross_entropy(out, data.y[data.train_mask].view(-1))
        loss.backward()
        optimizer.step()

        total_loss += float(loss) * int(data.train_mask.sum())
        total_examples += int(data.train_mask.sum())
        pbar.update(1)

    pbar.close()

    return total_loss / total_examples
Ejemplo n.º 5
0
def test_linkx(num_edge_layers):
    x = torch.randn(4, 16)
    edge_index = torch.tensor([[0, 1, 2], [1, 2, 3]])
    edge_weight = torch.rand(edge_index.size(1))
    adj2 = SparseTensor.from_edge_index(edge_index, edge_weight)
    adj1 = adj2.set_value(None)

    model = LINKX(num_nodes=4,
                  in_channels=16,
                  hidden_channels=32,
                  out_channels=8,
                  num_layers=2,
                  num_edge_layers=num_edge_layers)
    assert str(model) == 'LINKX(num_nodes=4, in_channels=16, out_channels=8)'

    out = model(x, edge_index)
    assert out.size() == (4, 8)
    assert torch.allclose(out, model(x, adj1.t()), atol=1e-4)

    out = model(None, edge_index)
    assert out.size() == (4, 8)
    assert torch.allclose(out, model(None, adj1.t()), atol=1e-4)

    out = model(x, edge_index, edge_weight)
    assert out.size() == (4, 8)
    assert torch.allclose(out, model(x, adj2.t()), atol=1e-4)

    out = model(None, edge_index, edge_weight)
    assert out.size() == (4, 8)
    assert torch.allclose(out, model(None, adj2.t()), atol=1e-4)
Ejemplo n.º 6
0
def test_to_hetero_with_bases_and_rgcn_equal_output():
    torch.manual_seed(1234)

    # Run `RGCN` with basis decomposition:
    x = torch.randn(10, 16)  # 6 paper nodes, 4 author nodes
    adj = (torch.rand(10, 10) > 0.5)
    adj[6:, 6:] = False
    edge_index = adj.nonzero(as_tuple=False).t().contiguous()
    row, col = edge_index

    # # 0 = paper<->paper, 1 = author->paper, 2 = paper->author
    edge_type = torch.full((edge_index.size(1), ), -1, dtype=torch.long)
    edge_type[(row < 6) & (col < 6)] = 0
    edge_type[(row < 6) & (col >= 6)] = 1
    edge_type[(row >= 6) & (col < 6)] = 2
    assert edge_type.min() == 0

    num_bases = 4
    conv = RGCNConv(16, 32, num_relations=3, num_bases=num_bases, aggr='add')
    out1 = conv(x, edge_index, edge_type)

    # Run `to_hetero_with_bases`:
    x_dict = {
        'paper': x[:6],
        'author': x[6:],
    }
    edge_index_dict = {
        ('paper', '_', 'paper'):
        edge_index[:, edge_type == 0],
        ('paper', '_', 'author'):
        edge_index[:, edge_type == 1] - torch.tensor([[0], [6]]),
        ('author', '_', 'paper'):
        edge_index[:, edge_type == 2] - torch.tensor([[6], [0]]),
    }

    adj_t_dict = {
        key: SparseTensor.from_edge_index(edge_index).t()
        for key, edge_index in edge_index_dict.items()
    }

    metadata = (list(x_dict.keys()), list(edge_index_dict.keys()))
    model = to_hetero_with_bases(RGCN(16, 32), metadata, num_bases=num_bases,
                                 debug=False)

    # Set model weights:
    for i in range(num_bases):
        model.conv.convs[i].lin.weight.data = conv.weight[i].data.t()
        model.conv.convs[i].edge_type_weight.data = conv.comp[:, i].data.t()

    model.lin.weight.data = conv.root.data.t()
    model.lin.bias.data = conv.bias.data

    out2 = model(x_dict, edge_index_dict)
    out2 = torch.cat([out2['paper'], out2['author']], dim=0)
    assert torch.allclose(out1, out2, atol=1e-6)

    out3 = model(x_dict, adj_t_dict)
    out3 = torch.cat([out3['paper'], out3['author']], dim=0)
    assert torch.allclose(out1, out3, atol=1e-6)
def test_to_hetero_and_rgcn_equal_output():
    torch.manual_seed(1234)

    # Run `RGCN`:
    x = torch.randn(10, 16)  # 6 paper nodes, 4 author nodes
    adj = (torch.rand(10, 10) > 0.5)
    adj[6:, 6:] = False
    edge_index = adj.nonzero(as_tuple=False).t().contiguous()
    row, col = edge_index

    # # 0 = paper<->paper, 1 = paper->author, 2 = author->paper
    edge_type = torch.full((edge_index.size(1), ), -1, dtype=torch.long)
    edge_type[(row < 6) & (col < 6)] = 0
    edge_type[(row < 6) & (col >= 6)] = 1
    edge_type[(row >= 6) & (col < 6)] = 2
    assert edge_type.min() == 0

    conv = RGCNConv(16, 32, num_relations=3)
    out1 = conv(x, edge_index, edge_type)

    # Run `to_hetero`:
    x_dict = {
        'paper': x[:6],
        'author': x[6:],
    }
    edge_index_dict = {
        ('paper', '_', 'paper'):
        edge_index[:, edge_type == 0],
        ('paper', '_', 'author'):
        edge_index[:, edge_type == 1] - torch.tensor([[0], [6]]),
        ('author', '_', 'paper'):
        edge_index[:, edge_type == 2] - torch.tensor([[6], [0]]),
    }

    node_types, edge_types = list(x_dict.keys()), list(edge_index_dict.keys())

    adj_t_dict = {
        key: SparseTensor.from_edge_index(edge_index).t()
        for key, edge_index in edge_index_dict.items()
    }

    model = to_hetero(RGCN(16, 32), (node_types, edge_types))

    # Set model weights:
    for i, edge_type in enumerate(edge_types):
        weight = model.conv['__'.join(edge_type)].lin.weight
        weight.data = conv.weight[i].data.t()
    for i, node_type in enumerate(node_types):
        model.lin[node_type].weight.data = conv.root.data.t()
        model.lin[node_type].bias.data = conv.bias.data

    out2 = model(x_dict, edge_index_dict)
    out2 = torch.cat([out2['paper'], out2['author']], dim=0)
    assert torch.allclose(out1, out2, atol=1e-6)

    out3 = model(x_dict, adj_t_dict)
    out3 = torch.cat([out3['paper'], out3['author']], dim=0)
    assert torch.allclose(out1, out3, atol=1e-6)
Ejemplo n.º 8
0
def test_lists_of_SparseTensors():
    e1 = torch.tensor([[4, 1, 3, 2, 2, 3], [1, 3, 2, 3, 3, 2]])
    e2 = torch.tensor([[0, 1, 4, 7, 2, 9], [7, 2, 2, 1, 4, 7]])
    e3 = torch.tensor([[3, 5, 1, 2, 3, 3], [5, 0, 2, 1, 3, 7]])
    e4 = torch.tensor([[0, 1, 9, 2, 0, 3], [1, 1, 2, 1, 3, 2]])
    adj1 = SparseTensor.from_edge_index(e1, sparse_sizes=(11, 11))
    adj2 = SparseTensor.from_edge_index(e2, sparse_sizes=(22, 22))
    adj3 = SparseTensor.from_edge_index(e3, sparse_sizes=(12, 12))
    adj4 = SparseTensor.from_edge_index(e4, sparse_sizes=(15, 15))

    d1 = Data(adj_test=[adj1, adj2])
    d2 = Data(adj_test=[adj3, adj4])

    data_list = [d1, d2]
    dataset = MyTestDataset3(data_list)
    assert len(dataset) == 2
    assert dataset[0].adj_test[0].sparse_sizes() == (11, 11)
    assert dataset[0].adj_test[1].sparse_sizes() == (22, 22)
    assert dataset[1].adj_test[0].sparse_sizes() == (12, 12)
    assert dataset[1].adj_test[1].sparse_sizes() == (15, 15)
Ejemplo n.º 9
0
    def __init__(self, edge_index_dict, embedding_dim, metapath, walk_length,
                 context_size, walks_per_node=1, num_negative_samples=1,
                 num_nodes_dict=None, sparse=False):
        super(MetaPath2Vec, self).__init__()

        if num_nodes_dict is None:
            num_nodes_dict = {}
            for keys, edge_index in edge_index_dict.items():
                key = keys[0]
                N = int(edge_index[0].max() + 1)
                num_nodes_dict[key] = max(N, num_nodes_dict.get(key, N))

                key = keys[-1]
                N = int(edge_index[1].max() + 1)
                num_nodes_dict[key] = max(N, num_nodes_dict.get(key, N))

        adj_dict = {}
        for keys, edge_index in edge_index_dict.items():
            sizes = (num_nodes_dict[keys[0]], num_nodes_dict[keys[-1]])
            adj = SparseTensor.from_edge_index(edge_index, sparse_sizes=sizes)
            adj_dict[keys] = adj

        self.adj_dict = adj_dict
        self.embedding_dim = embedding_dim
        self.metapath = metapath
        assert metapath[0][0] == metapath[-1][-1]
        self.walk_length = walk_length
        self.context_size = context_size
        self.walks_per_node = walks_per_node
        self.num_negative_samples = num_negative_samples
        self.num_nodes_dict = num_nodes_dict

        types = set([x[0] for x in metapath]) | set([x[-1] for x in metapath])
        types = sorted(list(types))

        count = 0
        self.start, self.end = {}, {}
        for key in types:
            self.start[key] = count
            count += num_nodes_dict[key]
            self.end[key] = count

        offset = [self.start[metapath[0][0]]]
        offset += [self.start[keys[-1]] for keys in metapath
                   ] * int((walk_length / len(metapath)) + 1)
        offset = offset[:walk_length + 1]
        assert len(offset) == walk_length + 1
        self.register_buffer('offset', torch.tensor(offset))

        self.embedding = Embedding(count, embedding_dim, sparse=sparse)

        self.reset_parameters()
Ejemplo n.º 10
0
    def extract(
        self,
        data: Data,
    ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:

        adj_t = SparseTensor.from_edge_index(
            data.edge_index,
            sparse_sizes=(data.num_nodes, data.num_nodes),
        ).t()

        n_mask = torch.eye(data.num_nodes, device=data.edge_index.device)
        for _ in range(self.num_hops):
            n_mask += adj_t @ n_mask

        return self.map(data, n_mask > 0)
Ejemplo n.º 11
0
def test_wl_conv():
    x1 = torch.tensor([1, 0, 0, 1])
    x2 = F.one_hot(x1).to(torch.float)
    edge_index = torch.tensor([[0, 1, 1, 2, 2, 3], [1, 0, 2, 1, 3, 2]])
    adj_t = SparseTensor.from_edge_index(edge_index).t()

    conv = WLConv()
    assert str(conv) == 'WLConv()'

    out = conv(x1, edge_index)
    assert out.tolist() == [0, 1, 1, 0]
    assert conv(x2, edge_index).tolist() == out.tolist()
    assert conv(x1, adj_t).tolist() == out.tolist()
    assert conv(x2, adj_t).tolist() == out.tolist()

    assert conv.histogram(out).tolist() == [[2, 2]]
    assert torch.allclose(conv.histogram(out, norm=True),
                          torch.tensor([[0.7071, 0.7071]]))
Ejemplo n.º 12
0
    def forward(self, x, edge_index, edge_attr=None, batch=None):
        if batch is None:
            batch = edge_index.new_zeros(x.size(0))
        num_node = x.size(0)

        k = F.relu(self.lin_2(x))

        A = SparseTensor.from_edge_index(edge_index=edge_index,
                                         edge_attr=edge_attr,
                                         sparse_sizes=(num_node, num_node))
        I = SparseTensor.eye(num_node, device=self.args.device)
        A_wave = fill_diag(A, 1)

        s = A_wave @ k

        score = s.squeeze()
        perm = topk(score, self.ratio, batch)

        A = self.norm(A)

        K_neighbor = A * k.T
        x_neighbor = K_neighbor @ x

        # ----modified
        deg = sum(A, dim=1)
        deg_inv = deg.pow_(-1)
        deg_inv.masked_fill_(deg_inv == float('inf'), 0.)
        x_neighbor = x_neighbor * deg_inv.view(1, -1).T
        # ----
        x_self = x * k

        x = x_neighbor * (
            1 - self.args.combine_ratio) + x_self * self.args.combine_ratio

        x = x[perm]
        batch = batch[perm]
        edge_index, edge_attr = filter_adj(edge_index,
                                           edge_attr,
                                           perm,
                                           num_nodes=s.size(0))

        return x, edge_index, edge_attr, batch, perm
Ejemplo n.º 13
0
def test(epoch):
    model.eval()

    y_true = {"train": [], "valid": [], "test": []}
    y_pred = {"train": [], "valid": [], "test": []}

    pbar = tqdm(total=len(test_loader))
    pbar.set_description(f'Evaluating epoch: {epoch:03d}')

    for data in test_loader:
        data = data.to(device)

        # Memory-efficient aggregations
        adj_t = SparseTensor.from_edge_index(data.edge_index).t()
        out = model(data.x, adj_t).argmax(dim=-1, keepdim=True)

        for split in ['train', 'valid', 'test']:
            mask = data[f'{split}_mask']
            y_true[split].append(data.y[mask].cpu())
            y_pred[split].append(out[mask].cpu())

        pbar.update(1)

    pbar.close()

    train_acc = evaluator.eval({
        'y_true': torch.cat(y_true['train'], dim=0),
        'y_pred': torch.cat(y_pred['train'], dim=0),
    })['acc']

    valid_acc = evaluator.eval({
        'y_true': torch.cat(y_true['valid'], dim=0),
        'y_pred': torch.cat(y_pred['valid'], dim=0),
    })['acc']

    test_acc = evaluator.eval({
        'y_true': torch.cat(y_true['test'], dim=0),
        'y_pred': torch.cat(y_pred['test'], dim=0),
    })['acc']

    return train_acc, valid_acc, test_acc
Ejemplo n.º 14
0
    def __init__(self,
                 edge_index: torch.Tensor,
                 node_idx: torch.Tensor,
                 sizes: List[int],
                 num_nodes: Optional[int] = None,
                 flow: str = 'source_to_target',
                 **kwargs):

        N = int(edge_index.max() + 1) if num_nodes is None else num_nodes
        edge_attr = torch.arange(edge_index.size(1))
        adj = SparseTensor.from_edge_index(edge_index,
                                           edge_attr, (N, N),
                                           is_sorted=False)
        self.adj = adj.t() if flow == 'source_to_target' else adj

        self.sizes = sizes
        self.flow = flow
        assert self.flow in ['source_to_target', 'target_to_source']

        super(NeighborSampler, self).__init__(node_idx.tolist(),
                                              collate_fn=self.__gen_batch__,
                                              **kwargs)
Ejemplo n.º 15
0
    def __init__(self,
                 edge_index,
                 edge_weight,
                 embedding_dim,
                 walk_length,
                 context_size,
                 walks_per_node=1,
                 p=1,
                 q=1,
                 num_negative_samples=1,
                 num_nodes=None,
                 sparse=False):
        super(GuidedNode2Vec,
              self).__init__(edge_index, embedding_dim, walk_length,
                             context_size, walks_per_node, p, q,
                             num_negative_samples, num_nodes, sparse)

        N = maybe_num_nodes(edge_index, num_nodes)
        self.adj = SparseTensor.from_edge_index(edge_index,
                                                edge_attr=edge_weight,
                                                sparse_sizes=(N, N))
        self.adj = self.adj.to('cpu')
Ejemplo n.º 16
0
    def __dropout_adj__(self, sparse_adj: SparseTensor,
                        dropout_adj_prob: float):
        # number of nodes
        N = sparse_adj.size(0)
        # sparse adj matrix to dense adj matrix
        row, col, edge_attr = sparse_adj.coo()
        edge_index = torch.stack([row, col], dim=0)
        # dropout adjacency matrix -> generalization
        edge_index, edge_attr = dropout_adj(edge_index,
                                            edge_attr=edge_attr,
                                            p=dropout_adj_prob,
                                            force_undirected=True,
                                            training=self.training)
        # because dropout removes self-loops (due to force_undirected=True), make sure to add them back again
        edge_index, edge_attr = add_remaining_self_loops(edge_index,
                                                         edge_weight=edge_attr,
                                                         fill_value=0.00,
                                                         num_nodes=N)
        # dense adj matrix to sparse adj matrix
        sparse_adj = SparseTensor.from_edge_index(edge_index,
                                                  edge_attr=edge_attr,
                                                  sparse_sizes=(N, N))

        return sparse_adj
Ejemplo n.º 17
0
    def __call__(self, data: Data) -> Data:
        num_nodes = data.num_nodes
        edge_index, edge_weight = data.edge_index, data.edge_weight

        adj = SparseTensor.from_edge_index(edge_index,
                                           edge_weight,
                                           sparse_sizes=(num_nodes, num_nodes))

        # Compute D^{-1} A:
        deg_inv = 1.0 / adj.sum(dim=1)
        deg_inv[deg_inv == float('inf')] = 0
        adj = adj * deg_inv.view(-1, 1)

        out = adj
        row, col, value = out.coo()
        pe_list = [get_self_loop_attr((row, col), value, num_nodes)]
        for _ in range(self.walk_length - 1):
            out = out @ adj
            row, col, value = out.coo()
            pe_list.append(get_self_loop_attr((row, col), value, num_nodes))
        pe = torch.stack(pe_list, dim=-1)

        data = add_node_attr(data, pe, attr_name=self.attr_name)
        return data
Ejemplo n.º 18
0
def test_batch():
    torch_geometric.set_debug(True)

    x1 = torch.tensor([1, 2, 3], dtype=torch.float)
    x1_sp = SparseTensor.from_dense(x1.view(-1, 1))
    e1 = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]])
    adj1 = SparseTensor.from_edge_index(e1)
    s1 = '1'
    array1 = ['1', '2']
    x2 = torch.tensor([1, 2], dtype=torch.float)
    x2_sp = SparseTensor.from_dense(x2.view(-1, 1))
    e2 = torch.tensor([[0, 1], [1, 0]])
    adj2 = SparseTensor.from_edge_index(e2)
    s2 = '2'
    array2 = ['3', '4', '5']
    x3 = torch.tensor([1, 2, 3, 4], dtype=torch.float)
    x3_sp = SparseTensor.from_dense(x3.view(-1, 1))
    e3 = torch.tensor([[0, 1, 1, 2, 2, 3], [1, 0, 2, 1, 3, 2]])
    adj3 = SparseTensor.from_edge_index(e3)
    s3 = '3'
    array3 = ['6', '7', '8', '9']

    data1 = Data(x=x1, x_sp=x1_sp, edge_index=e1, adj=adj1, s=s1, array=array1,
                 num_nodes=3)
    data2 = Data(x=x2, x_sp=x2_sp, edge_index=e2, adj=adj2, s=s2, array=array2,
                 num_nodes=2)
    data3 = Data(x=x3, x_sp=x3_sp, edge_index=e3, adj=adj3, s=s3, array=array3,
                 num_nodes=4)

    batch = Batch.from_data_list([data1])
    assert str(batch) == ('Batch(x=[3], edge_index=[2, 4], '
                          'x_sp=[3, 1, nnz=3], adj=[3, 3, nnz=4], s=[1], '
                          'array=[1], num_nodes=3, batch=[3], ptr=[2])')
    assert batch.num_graphs == 1
    assert len(batch) == 9
    assert batch.x.tolist() == [1, 2, 3]
    assert batch.x_sp.to_dense().view(-1).tolist() == batch.x.tolist()
    assert batch.edge_index.tolist() == [[0, 1, 1, 2], [1, 0, 2, 1]]
    edge_index = torch.stack(batch.adj.coo()[:2], dim=0)
    assert edge_index.tolist() == batch.edge_index.tolist()
    assert batch.s == ['1']
    assert batch.array == [['1', '2']]
    assert batch.num_nodes == 3
    assert batch.batch.tolist() == [0, 0, 0]
    assert batch.ptr.tolist() == [0, 3]

    batch = Batch.from_data_list([data1, data2, data3], follow_batch=['s'])

    assert str(batch) == ('Batch(x=[9], edge_index=[2, 12], '
                          'x_sp=[9, 1, nnz=9], adj=[9, 9, nnz=12], s=[3], '
                          's_batch=[3], array=[3], num_nodes=9, batch=[9], '
                          'ptr=[4])')
    assert batch.num_graphs == 3
    assert len(batch) == 10
    assert batch.x.tolist() == [1, 2, 3, 1, 2, 1, 2, 3, 4]
    assert batch.x_sp.to_dense().view(-1).tolist() == batch.x.tolist()
    assert batch.edge_index.tolist() == [[0, 1, 1, 2, 3, 4, 5, 6, 6, 7, 7, 8],
                                         [1, 0, 2, 1, 4, 3, 6, 5, 7, 6, 8, 7]]
    edge_index = torch.stack(batch.adj.coo()[:2], dim=0)
    assert edge_index.tolist() == batch.edge_index.tolist()
    assert batch.s == ['1', '2', '3']
    assert batch.s_batch.tolist() == [0, 1, 2]
    assert batch.array == [['1', '2'], ['3', '4', '5'], ['6', '7', '8', '9']]
    assert batch.num_nodes == 9
    assert batch.batch.tolist() == [0, 0, 0, 1, 1, 2, 2, 2, 2]
    assert batch.ptr.tolist() == [0, 3, 5, 9]

    data = batch[0]
    assert str(data) == ("Data(x=[3], edge_index=[2, 4], x_sp=[3, 1, nnz=3], "
                         "adj=[3, 3, nnz=4], s='1', array=[2], num_nodes=3)")
    data = batch[1]
    assert str(data) == ("Data(x=[2], edge_index=[2, 2], x_sp=[2, 1, nnz=2], "
                         "adj=[2, 2, nnz=2], s='2', array=[3], num_nodes=2)")

    data = batch[2]
    assert str(data) == ("Data(x=[4], edge_index=[2, 6], x_sp=[4, 1, nnz=4], "
                         "adj=[4, 4, nnz=6], s='3', array=[4], num_nodes=4)")

    assert len(batch.index_select([1, 0])) == 2
    assert len(batch.index_select(torch.tensor([1, 0]))) == 2
    assert len(batch.index_select(torch.tensor([True, False]))) == 1
    assert len(batch.index_select(np.array([1, 0], dtype=np.int64))) == 2
    assert len(batch.index_select(np.array([True, False]))) == 1
    assert len(batch[:2]) == 2

    data_list = batch.to_data_list()
    assert len(data_list) == 3

    assert len(data_list[0]) == 7
    assert data_list[0].x.tolist() == [1, 2, 3]
    assert data_list[0].x_sp.to_dense().view(-1).tolist() == [1, 2, 3]
    assert data_list[0].edge_index.tolist() == [[0, 1, 1, 2], [1, 0, 2, 1]]
    edge_index = torch.stack(data_list[0].adj.coo()[:2], dim=0)
    assert edge_index.tolist() == data_list[0].edge_index.tolist()
    assert data_list[0].s == '1'
    assert data_list[0].array == ['1', '2']
    assert data_list[0].num_nodes == 3

    assert len(data_list[1]) == 7
    assert data_list[1].x.tolist() == [1, 2]
    assert data_list[1].x_sp.to_dense().view(-1).tolist() == [1, 2]
    assert data_list[1].edge_index.tolist() == [[0, 1], [1, 0]]
    edge_index = torch.stack(data_list[1].adj.coo()[:2], dim=0)
    assert edge_index.tolist() == data_list[1].edge_index.tolist()
    assert data_list[1].s == '2'
    assert data_list[1].array == ['3', '4', '5']
    assert data_list[1].num_nodes == 2

    assert len(data_list[2]) == 7
    assert data_list[2].x.tolist() == [1, 2, 3, 4]
    assert data_list[2].x_sp.to_dense().view(-1).tolist() == [1, 2, 3, 4]
    assert data_list[2].edge_index.tolist() == [[0, 1, 1, 2, 2, 3],
                                                [1, 0, 2, 1, 3, 2]]
    edge_index = torch.stack(data_list[2].adj.coo()[:2], dim=0)
    assert edge_index.tolist() == data_list[2].edge_index.tolist()
    assert data_list[2].s == '3'
    assert data_list[2].array == ['6', '7', '8', '9']
    assert data_list[2].num_nodes == 4

    torch_geometric.set_debug(True)
Ejemplo n.º 19
0
def test_graph_store_conversion():
    graph_store = MyGraphStore()
    edge_index = get_edge_index(100, 100, 300)
    edge_index = sort_edge_index(edge_index, sort_by_row=False)
    adj = SparseTensor.from_edge_index(edge_index, sparse_sizes=(100, 100))

    coo = (edge_index[0], edge_index[1])
    csr = adj.csr()[:2]
    csc = adj.csc()[-2::-1]

    # Put all edge indices:
    graph_store.put_edge_index(edge_index=coo,
                               edge_type=('v', '1', 'v'),
                               layout='coo',
                               size=(100, 100),
                               is_sorted=True)
    assert graph_store.num_src_nodes(edge_type=('v', '1', 'v')) == 100
    assert graph_store.num_dst_nodes(edge_type=('v', '1', 'v')) == 100

    graph_store.put_edge_index(edge_index=csr,
                               edge_type=('v', '2', 'v'),
                               layout='csr',
                               size=(100, 100))
    assert graph_store.num_src_nodes(edge_type=('v', '2', 'v')) == 100
    assert graph_store.num_dst_nodes(edge_type=('v', '2', 'v')) == 100

    graph_store.put_edge_index(edge_index=csc,
                               edge_type=('v', '3', 'v'),
                               layout='csc',
                               size=(100, 100))
    assert graph_store.num_src_nodes(edge_type=('v', '3', 'v')) == 100
    assert graph_store.num_dst_nodes(edge_type=('v', '3', 'v')) == 100

    def assert_edge_index_equal(expected: torch.Tensor, actual: torch.Tensor):
        assert torch.equal(sort_edge_index(expected), sort_edge_index(actual))

    # Convert to COO:
    row_dict, col_dict, perm_dict = graph_store.coo()
    assert len(row_dict) == len(col_dict) == len(perm_dict) == 3
    for key in row_dict.keys():
        actual = torch.stack((row_dict[key], col_dict[key]))
        assert_edge_index_equal(actual, edge_index)
        assert perm_dict[key] is None

    # Convert to CSR:
    rowptr_dict, col_dict, perm_dict = graph_store.csr()
    assert len(rowptr_dict) == len(col_dict) == len(perm_dict) == 3
    for key in rowptr_dict:
        assert torch.equal(rowptr_dict[key], csr[0])
        assert torch.equal(col_dict[key], csr[1])
        if key == ('v', '1', 'v'):
            assert perm_dict[key] is not None

    # Convert to CSC:
    row_dict, colptr_dict, perm_dict = graph_store.csc()
    assert len(row_dict) == len(colptr_dict) == len(perm_dict) == 3
    for key in row_dict:
        assert torch.equal(row_dict[key], csc[0])
        assert torch.equal(colptr_dict[key], csc[1])
        assert perm_dict[key] is None

    # Ensure that 'edge_types' parameters work as intended:
    def _tensor_eq(expected: List[OptTensor], actual: List[OptTensor]):
        for tensor_expected, tensor_actual in zip(expected, actual):
            if tensor_expected is None or tensor_actual is None:
                return tensor_actual == tensor_expected
            return torch.equal(tensor_expected, tensor_actual)

    edge_types = [('v', '1', 'v'), ('v', '2', 'v')]
    assert _tensor_eq(
        list(graph_store.coo()[0].values())[:-1],
        graph_store.coo(edge_types=edge_types)[0].values())
    assert _tensor_eq(
        list(graph_store.csr()[0].values())[:-1],
        graph_store.csr(edge_types=edge_types)[0].values())
    assert _tensor_eq(
        list(graph_store.csc()[0].values())[:-1],
        graph_store.csc(edge_types=edge_types)[0].values())
Ejemplo n.º 20
0
def main():
    parser = argparse.ArgumentParser(description="OGBL-COLLAB (GNN)")
    parser.add_argument("--device", type=int, default=0)
    parser.add_argument("--log_steps", type=int, default=1)
    parser.add_argument("--use_sage", action="store_true")
    parser.add_argument("--use_valedges_as_input", action="store_true")
    parser.add_argument("--num_layers", type=int, default=3)
    parser.add_argument("--hidden_channels", type=int, default=256)
    parser.add_argument("--dropout", type=float, default=0.0)
    parser.add_argument("--batch_size", type=int, default=64 * 1024)
    parser.add_argument("--lr", type=float, default=0.001)
    parser.add_argument("--epochs", type=int, default=400)
    parser.add_argument("--eval_steps", type=int, default=1)
    parser.add_argument("--runs", type=int, default=1)
    parser.add_argument("--seed",type=int,default=1)
    args = parser.parse_args()
    print(args)
    
    device = f"cuda:{args.device}" if torch.cuda.is_available() else "cpu"
    device = torch.device(device)

    dataset = PygLinkPropPredDataset(name="ogbl-collab")
    data = dataset[0]
    edge_index = data.edge_index
    data.edge_weight = data.edge_weight.view(-1).to(torch.float)
    data = T.ToSparseTensor()(data)

    split_edge = dataset.get_edge_split()

    
    # Use training + validation edges for inference on test set.
    if args.use_valedges_as_input:
        val_edge_index = split_edge["valid"]["edge"].t()
        full_edge_index = torch.cat([edge_index, val_edge_index], dim=-1)
        data.full_adj_t = SparseTensor.from_edge_index(full_edge_index).t()
        data.full_adj_t = data.full_adj_t.to_symmetric()
    else:
        data.full_adj_t = data.adj_t

    data = data.to(device)

    if args.use_sage:
        model = SAGE(
            data.num_features,
            args.hidden_channels,
            args.hidden_channels,
            args.num_layers,
            args.dropout,
        ).to(device)
    else:
        model = GCN(
            data.num_features,
            args.hidden_channels,
            args.hidden_channels,
            args.num_layers,
            args.dropout,
        ).to(device)

    predictor = LinkPredictor(
        args.hidden_channels, args.hidden_channels, 1, args.num_layers, args.dropout
    ).to(device)

    evaluator = Evaluator(name="ogbl-collab")
    loggers = {
        "Hits@10": Logger(args.runs, args),
        "Hits@50": Logger(args.runs, args),
        "Hits@100": Logger(args.runs, args),
    }

    for run in tqdm(range(args.runs)):
        torch.manual_seed(args.seed + run)
        np.random.seed(args.seed+run)
        model.reset_parameters()
        predictor.reset_parameters()
        optimizer = torch.optim.Adam(
            list(model.parameters()) + list(predictor.parameters()), lr=args.lr
        )

        for epoch in range(1, 1 + args.epochs):
            loss = train(model, predictor, data, split_edge, optimizer, args.batch_size)

            if epoch % args.eval_steps == 0:
                results = test(
                    model, predictor, data, split_edge, evaluator, args.batch_size
                )
                for key, result in results.items():
                    loggers[key].add_result(run, result)

                if epoch % args.log_steps == 0:
                    for key, result in results.items():
                        train_hits, valid_hits, test_hits = result
                        print(key)
                        print(
                            f"Run: {run + 1:02d}, "
                            f"Epoch: {epoch:02d}, "
                            f"Loss: {loss:.4f}, "
                            f"Train: {100 * train_hits:.2f}%, "
                            f"Valid: {100 * valid_hits:.2f}%, "
                            f"Test: {100 * test_hits:.2f}%"
                        )
                    print("---")

        for key in loggers.keys():
            print(key)
            loggers[key].print_statistics(run)

    for key in loggers.keys():
        print(key)
        loggers[key].print_statistics()
def test_custom_neighbor_loader(FeatureStore, GraphStore):
    # Initialize feature store, graph store, and reference:
    feature_store = FeatureStore()
    graph_store = GraphStore()
    data = HeteroData()

    # Set up node features:
    x = torch.arange(100)
    data['paper'].x = x
    feature_store.put_tensor(x, group_name='paper', attr_name='x', index=None)

    x = torch.arange(100, 300)
    data['author'].x = x
    feature_store.put_tensor(x, group_name='author', attr_name='x', index=None)

    # Set up edge indices:

    # COO:
    edge_index = get_edge_index(100, 100, 500)
    data['paper', 'to', 'paper'].edge_index = edge_index
    coo = (edge_index[0], edge_index[1])
    graph_store.put_edge_index(edge_index=coo,
                               edge_type=('paper', 'to', 'paper'),
                               layout='coo',
                               size=(100, 100))

    # CSR:
    edge_index = get_edge_index(100, 200, 1000)
    data['paper', 'to', 'author'].edge_index = edge_index
    csr = SparseTensor.from_edge_index(edge_index).csr()[:2]
    graph_store.put_edge_index(edge_index=csr,
                               edge_type=('paper', 'to', 'author'),
                               layout='csr',
                               size=(100, 200))

    # CSC:
    edge_index = get_edge_index(200, 100, 1000)
    data['author', 'to', 'paper'].edge_index = edge_index
    csc = SparseTensor(row=edge_index[1], col=edge_index[0]).csr()[-2::-1]
    graph_store.put_edge_index(edge_index=csc,
                               edge_type=('author', 'to', 'paper'),
                               layout='csc',
                               size=(200, 100))

    # COO (sorted):
    edge_index = get_edge_index(200, 200, 100)
    edge_index = edge_index[:, edge_index[1].argsort()]
    data['author', 'to', 'author'].edge_index = edge_index
    coo = (edge_index[0], edge_index[1])
    graph_store.put_edge_index(edge_index=coo,
                               edge_type=('author', 'to', 'author'),
                               layout='coo',
                               size=(200, 200),
                               is_sorted=True)

    # Construct neighbor loaders:
    loader1 = NeighborLoader(data,
                             batch_size=20,
                             input_nodes=('paper', range(100)),
                             num_neighbors=[-1] * 2)

    loader2 = NeighborLoader((feature_store, graph_store),
                             batch_size=20,
                             input_nodes=('paper', range(100)),
                             num_neighbors=[-1] * 2)

    assert str(loader1) == str(loader2)
    assert len(loader1) == len(loader2)

    for batch1, batch2 in zip(loader1, loader2):
        assert len(batch1) == len(batch2)
        assert batch1['paper'].batch_size == batch2['paper'].batch_size

        # Mapped indices of neighbors may be differently sorted:
        assert torch.allclose(batch1['paper'].x.sort()[0],
                              batch2['paper'].x.sort()[0])
        assert torch.allclose(batch1['author'].x.sort()[0],
                              batch2['author'].x.sort()[0])

        assert (batch1['paper', 'to', 'paper'].edge_index.size() == batch1[
            'paper', 'to', 'paper'].edge_index.size())
        assert (batch1['paper', 'to', 'author'].edge_index.size() == batch1[
            'paper', 'to', 'author'].edge_index.size())
        assert (batch1['author', 'to', 'paper'].edge_index.size() == batch1[
            'author', 'to', 'paper'].edge_index.size())
Ejemplo n.º 22
0
    def __create_edge_index_for_cluster__(self,
                                          cluster_info: list,
                                          modeling_type: str = "fine"):
        my_key = f"{str(cluster_info)}_{modeling_type}"

        if my_key in self.__conn_dict__:
            return  # no need for further processing

        cluster_info_list = []
        custom_batch = None
        if modeling_type == "coarse":
            # first: metric count, second: component name, third: group name, fourth: node name
            cluster_info_list = [el[1:] for el in cluster_info]
            custom_batch = list(range(len(cluster_info_list)))
        else:
            custom_batch = []
            for i, el in enumerate(cluster_info):
                # first: metric count, second: component name, third: group name, fourth: node name
                cluster_info_list += (el[0] * [el[1:]])
                custom_batch += el[0] * [i]

        custom_batch = torch.tensor(custom_batch, dtype=torch.long)

        edge_index_list = []
        edge_attr_list = []
        # each tuple has following structure: (component name, group name, node name)
        for i, tuple_i in enumerate(cluster_info_list):
            for j, tuple_j in enumerate(cluster_info_list):
                # identity (self-loop)
                if i == j:
                    edge_index_list.append((i, j))
                    edge_attr_list.append(self.__get_edge_type__("identity"))
                # system of tuple_i is hosted on tuple_j
                elif tuple_i[2] == tuple_j[1]:
                    edge_index_list.append((i, j))
                    edge_attr_list.append(self.__get_edge_type__("guest-host"))
                # system of tuple_i is hosting tuple_j
                elif tuple_i[1] == tuple_j[2]:
                    edge_index_list.append((i, j))
                    edge_attr_list.append(self.__get_edge_type__("host-guest"))
                # both systems are from the same group
                elif tuple_i[1] == tuple_j[1]:
                    edge_index_list.append((i, j))
                    edge_attr_list.append(self.__get_edge_type__(tuple_j[1]))
                # both systems are from distinct groups, but there is communication
                elif tuple_j[1] in __node_relation_dict__.get(tuple_i[1], []):
                    edge_index_list.append((i, j))
                    edge_attr_list.append(
                        self.__get_edge_type__(f"{tuple_i[1]}->{tuple_j[1]}"))
                # both systems are from distinct groups, but there is communication
                elif tuple_i[1] in __node_relation_dict__.get(tuple_j[1], []):
                    edge_index_list.append((i, j))
                    edge_attr_list.append(
                        self.__get_edge_type__(f"{tuple_j[1]}->{tuple_i[1]}"))

        edge_index = torch.tensor(edge_index_list,
                                  dtype=torch.long).t().contiguous()

        edge_attr = torch.tensor(edge_attr_list,
                                 dtype=torch.double).contiguous()

        N = len(cluster_info_list)
        adj = SparseTensor.from_edge_index(edge_index,
                                           edge_attr=edge_attr,
                                           sparse_sizes=(N, N))

        self.__conn_dict__[my_key] = (adj, custom_batch)
Ejemplo n.º 23
0
def main():
    parser = argparse.ArgumentParser(description='OGBL-COLLAB (GNN)')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--log_steps', type=int, default=1)
    parser.add_argument('--use_sage', action='store_true')
    parser.add_argument('--use_valedges_as_input', action='store_true')
    parser.add_argument('--num_layers', type=int, default=3)
    parser.add_argument('--hidden_channels', type=int, default=256)
    parser.add_argument('--dropout', type=float, default=0.0)
    parser.add_argument('--batch_size', type=int, default=64 * 1024)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--epochs', type=int, default=400)
    parser.add_argument('--eval_steps', type=int, default=1)
    parser.add_argument('--runs', type=int, default=10)
    args = parser.parse_args()
    print(args)

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygLinkPropPredDataset(name='ogbl-collab')
    data = dataset[0]
    edge_index = data.edge_index
    data.edge_weight = data.edge_weight.view(-1).to(torch.float)
    data = T.ToSparseTensor()(data)

    split_edge = dataset.get_edge_split()

    # Use training + validation edges for inference on test set.
    if args.use_valedges_as_input:
        val_edge_index = split_edge['valid']['edge'].t()
        full_edge_index = torch.cat([edge_index, val_edge_index], dim=-1)
        data.full_adj_t = SparseTensor.from_edge_index(full_edge_index).t()
        data.full_adj_t = data.full_adj_t.to_symmetric()
    else:
        data.full_adj_t = data.adj_t

    data = data.to(device)

    if args.use_sage:
        model = SAGE(data.num_features, args.hidden_channels,
                     args.hidden_channels, args.num_layers,
                     args.dropout).to(device)
    else:
        model = GCN(data.num_features, args.hidden_channels,
                    args.hidden_channels, args.num_layers,
                    args.dropout).to(device)

    predictor = LinkPredictor(args.hidden_channels, args.hidden_channels, 1,
                              args.num_layers, args.dropout).to(device)

    evaluator = Evaluator(name='ogbl-collab')
    loggers = {
        'Hits@10': Logger(args.runs, args),
        'Hits@50': Logger(args.runs, args),
        'Hits@100': Logger(args.runs, args),
    }

    for run in range(args.runs):
        model.reset_parameters()
        predictor.reset_parameters()
        optimizer = torch.optim.Adam(list(model.parameters()) +
                                     list(predictor.parameters()),
                                     lr=args.lr)

        for epoch in range(1, 1 + args.epochs):
            loss = train(model, predictor, data, split_edge, optimizer,
                         args.batch_size)

            if epoch % args.eval_steps == 0:
                results = test(model, predictor, data, split_edge, evaluator,
                               args.batch_size)
                for key, result in results.items():
                    loggers[key].add_result(run, result)

                if epoch % args.log_steps == 0:
                    for key, result in results.items():
                        train_hits, valid_hits, test_hits = result
                        print(key)
                        print(f'Run: {run + 1:02d}, '
                              f'Epoch: {epoch:02d}, '
                              f'Loss: {loss:.4f}, '
                              f'Train: {100 * train_hits:.2f}%, '
                              f'Valid: {100 * valid_hits:.2f}%, '
                              f'Test: {100 * test_hits:.2f}%')
                    print('---')

        for key in loggers.keys():
            print(key)
            loggers[key].print_statistics(run)

    for key in loggers.keys():
        print(key)
        loggers[key].print_statistics()