コード例 #1
0
def test_neighbor_sampler():
    torch.manual_seed(1234)

    start = torch.tensor([0, 1])
    cumdeg = torch.tensor([0, 3, 7])

    e_id = neighbor_sampler(start, cumdeg, size=1.0)
    assert e_id.tolist() == [0, 2, 1, 5, 6, 3, 4]

    e_id = neighbor_sampler(start, cumdeg, size=3)
    assert e_id.tolist() == [1, 0, 2, 4, 5, 6]
コード例 #2
0
    def __produce_subgraph__(self, b_id):
        r"""Produces a :obj:`Data` object holding the subgraph data for a given
        mini-batch :obj:`b_id`."""

        n_ids = [b_id]
        e_ids = []
        edge_indices = []

        for l in range(self.num_hops):
            e_id = neighbor_sampler(n_ids[-1], self.cumdeg, self.size[l])
            n_id = self.edge_index_j.index_select(0, e_id)
            n_id = n_id.unique(sorted=False)
            n_ids.append(n_id)
            e_ids.append(self.e_assoc.index_select(0, e_id))
            edge_index = self.data.edge_index.index_select(1, e_ids[-1])
            edge_indices.append(edge_index)

        n_id = torch.unique(torch.cat(n_ids, dim=0), sorted=False)
        self.tmp[n_id] = torch.arange(n_id.size(0))
        e_id = torch.cat(e_ids, dim=0)
        edge_index = self.tmp[torch.cat(edge_indices, dim=1)]

        num_nodes = n_id.size(0)
        idx = edge_index[0] * num_nodes + edge_index[1]
        idx, inv = idx.unique(sorted=False, return_inverse=True)
        edge_index = torch.stack([idx / num_nodes, idx % num_nodes], dim=0)
        e_id = e_id.new_zeros(edge_index.size(1)).scatter_(0, inv, e_id)

        return Data(edge_index=edge_index,
                    e_id=e_id,
                    n_id=n_id,
                    b_id=b_id,
                    sub_b_id=self.tmp[b_id],
                    num_nodes=num_nodes)
コード例 #3
0
ファイル: sampler.py プロジェクト: shigen97/SELAR
    def __produce_subgraph__(self, data):
        r"""Produces a :obj:`Data` object holding the subgraph data for a given
        mini-batch :obj:`b_id`."""

        b_id, u_id = data[:2]
        labels = data[2]

        n_ids = [torch.cat((b_id, u_id))]
        e_ids = []
        edge_indices = []
        edge_type_indices = []

        for l in range(self.num_hops):
            e_id = neighbor_sampler(n_ids[-1], self.cumdeg, self.size[l])
            n_id = self.edge_index_j.index_select(0, e_id)
            n_id = n_id.unique(sorted=False)
            n_ids.append(n_id)
            e_ids.append(self.e_assoc.index_select(0, e_id))

            edge_index = self.edge_index.index_select(1, e_ids[-1])
            edge_indices.append(edge_index[:2, :])
            edge_type = self.edge_type.index_select(0, e_ids[-1])
            edge_type_indices.append(edge_type)

        n_id = torch.unique(torch.cat(n_ids, dim=0), sorted=False)  # selected node = subgraph
        self.tmp[n_id] = torch.arange(n_id.size(0))  # renamed
        e_id = torch.cat(e_ids, dim=0)
        edge_index = self.tmp[torch.cat(edge_indices, dim=1)]  # re-indexing edge_index
        num_nodes = n_id.size(0)  # selected node size
        return Data(edge_index=edge_index, n_id=n_id,
                    target_items=self.tmp[b_id], target_users=self.tmp[u_id], labels=labels)
コード例 #4
0
    def __produce_bipartite_data_flow__(self, n_id):
        r"""Produces a :obj:`DataFlow` object with a bipartite assignment
        matrix for a given mini-batch :obj:`n_id`."""
        data_flow = DataFlow(n_id, self.flow)

        for l in range(self.num_hops):
            e_id = neighbor_sampler(n_id, self.cumdeg, self.size[l])

            new_n_id = self.edge_index_j.index_select(0, e_id)
            e_id = self.e_assoc[e_id]

            if self.add_self_loops:
                new_n_id = torch.cat([new_n_id, n_id], dim=0)
                new_n_id, inv = new_n_id.unique(sorted=False,
                                                return_inverse=True)
                res_n_id = inv[-n_id.size(0):]
            else:
                new_n_id = new_n_id.unique(sorted=False)
                res_n_id = None

            edges = [None, None]
            edge_index_i = self.edge_index[self.i, e_id]
            if self.add_self_loops:
                edge_index_i = torch.cat([edge_index_i, n_id], dim=0)

            self.tmp[n_id] = torch.arange(n_id.size(0))
            edges[self.i] = self.tmp[edge_index_i]

            edge_index_j = self.edge_index[self.j, e_id]
            if self.add_self_loops:
                edge_index_j = torch.cat([edge_index_j, n_id], dim=0)

            self.tmp[new_n_id] = torch.arange(new_n_id.size(0))
            edges[self.j] = self.tmp[edge_index_j]

            edge_index = torch.stack(edges, dim=0)

            e_id = self.e_id[e_id]
            if self.add_self_loops:
                if self.edge_index_loop.size(1) == self.data.num_nodes:
                    # Only set `e_id` if all self-loops were initially passed
                    # to the graph.
                    e_id = torch.cat([e_id, self.e_id_loop[n_id]])
                else:
                    e_id = None
                    if torch_geometric.is_debug_enabled():
                        warnings.warn(
                            ('Could not add edge identifiers to the DataFlow'
                             'object due to missing initial self-loops. '
                             'Please make sure that your graph already '
                             'contains self-loops in case you want to use '
                             'edge-conditioned operators.'))

            n_id = new_n_id

            data_flow.append(n_id, res_n_id, e_id, edge_index)

        return data_flow
コード例 #5
0
ファイル: sampler.py プロジェクト: shigen97/SELAR
    def __produce_subgraph__(self, data, h_item):
        r"""Produces a :obj:`Data` object holding the subgraph data for a given
        mini-batch :obj:`b_id`."""

        b_id, u_id = data[:2]
        labels = data[2]

        n_ids = [torch.cat((b_id, u_id))]
        e_ids = []
        edge_indices = []
        edge_type_indices = []

        for l in range(self.num_hops):
            e_id = neighbor_sampler(n_ids[-1], self.cumdeg, self.size[l])
            n_id = self.edge_index_j.index_select(0, e_id)
            n_id = n_id.unique(sorted=False)
            n_ids.append(n_id)
            e_ids.append(self.e_assoc.index_select(0, e_id))

            edge_index = self.edge_index.index_select(1, e_ids[-1])
            edge_indices.append(edge_index[:2, :])
            edge_type = self.edge_type.index_select(0, e_ids[-1])
            edge_type_indices.append(edge_type)

        if self.use_hint == 'True':
            h_edge_list=[]
            h_n_id = []
            for hub in h_item:
                users = self.edge_index[0, self.edge_index[1,:] == hub]
                for urs in users:
                    if urs in n_id:
                        h_edge_list.append([urs.item(), hub.item()])
                        h_n_id.append(urs.item())
            n_ids.append(torch.tensor(h_n_id))
            n_ids.append(torch.tensor(h_item))

        n_id = torch.unique(torch.cat(n_ids, dim=0), sorted=False)
        self.tmp[n_id] = torch.arange(n_id.size(0)) 
        e_id = torch.cat(e_ids, dim=0)
        edge_index = self.tmp[torch.cat(edge_indices, dim=1)]
        
        num_nodes = n_id.size(0)
        idx = edge_index[0] * num_nodes + edge_index[1]
        idx, inv = idx.unique(sorted=False, return_inverse=True)
        edge_index = torch.stack([idx // num_nodes, idx % num_nodes], dim=0)

        if self.use_hint == 'True':
            h_edge_index = torch.cat((edge_index, self.tmp[torch.tensor(h_edge_list).T]), 1)
            idx = h_edge_index[0] * num_nodes + h_edge_index[1]
            idx, inv = idx.unique(sorted=False, return_inverse=True)
            h_edge_index = torch.stack([idx // num_nodes, idx % num_nodes], dim=0)
            return Data(edge_index=edge_index, h_edge_index=h_edge_index, n_id=n_id, target_items=self.tmp[b_id], target_users=self.tmp[u_id], labels=labels)
        else:
            return Data(edge_index=edge_index, n_id=n_id, target_items=self.tmp[b_id], target_users=self.tmp[u_id], labels=labels)
コード例 #6
0
    def __produce_bipartite_data_flow__(self, n_id):
        r"""Produces a :obj:`DataFlow` object with a bipartite assignment
        matrix for a given mini-batch :obj:`n_id`."""
        data_flow = DataFlow(n_id, self.flow)

        for l in range(self.num_hops):
            e_id = neighbor_sampler(n_id, self.cumdeg, self.size[l])

            new_n_id = self.edge_index_j.index_select(0, e_id)
            e_id = self.e_assoc[e_id]

            if self.add_self_loops:
                new_n_id = torch.cat([new_n_id, n_id], dim=0)
                new_n_id, inv = new_n_id.unique(sorted=False,
                                                return_inverse=True)
                res_n_id = inv[-n_id.size(0):]
            else:
                new_n_id = new_n_id.unique(sorted=False)
                res_n_id = None

            edges = [None, None]
            edge_index_i = self.edge_index[self.i, e_id]
            if self.add_self_loops:
                edge_index_i = torch.cat([edge_index_i, n_id], dim=0)

            self.tmp[n_id] = torch.arange(n_id.size(0))
            edges[self.i] = self.tmp[edge_index_i]

            edge_index_j = self.edge_index[self.j, e_id]
            if self.add_self_loops:
                edge_index_j = torch.cat([edge_index_j, n_id], dim=0)

            self.tmp[new_n_id] = torch.arange(new_n_id.size(0))
            edges[self.j] = self.tmp[edge_index_j]

            edge_index = torch.stack(edges, dim=0)

            e_id = self.e_id[e_id]
            if self.add_self_loops:
                e_id = torch.cat([e_id, self.e_id_loop[n_id]])
            n_id = new_n_id

            data_flow.append(n_id, res_n_id, e_id, edge_index)

        return data_flow
コード例 #7
0
ファイル: data_sampler.py プロジェクト: zyq2016/SuperGAT
    def __produce_subgraph__(self, b_id):
        r"""Produces a :obj:`Data` object holding the subgraph data for a given
        mini-batch :obj:`b_id`."""

        n_ids = [b_id]
        e_ids = []
        edge_indices = []

        for l in range(self.num_hops):
            e_id = neighbor_sampler(n_ids[-1], self.cumdeg, self.size[l])
            n_id = self.edge_index_j.index_select(0, e_id)
            n_id = n_id.unique(sorted=False)
            n_ids.append(n_id)
            e_ids.append(self.e_assoc.index_select(0, e_id))
            edge_index = self.data.edge_index.index_select(1, e_ids[-1])
            edge_indices.append(edge_index)

        n_id = torch.unique(torch.cat(n_ids, dim=0), sorted=False)
        self.tmp[n_id] = torch.arange(n_id.size(0))
        e_id = torch.cat(e_ids, dim=0)
        edge_index = self.tmp[torch.cat(edge_indices, dim=1)]

        num_nodes = n_id.size(0)
        idx = edge_index[0] * num_nodes + edge_index[1]
        idx, inv = idx.unique(sorted=False, return_inverse=True)
        edge_index = torch.stack([idx / num_nodes, idx % num_nodes], dim=0)
        e_id = e_id.new_zeros(edge_index.size(1)).scatter_(0, inv, e_id)

        # n_id: original ID of nodes in the whole sub-graph.
        # b_id: original ID of nodes in the training graph.
        # sub_b_id: sampled ID of nodes in the training graph.

        # Get full-subgraph for negative sampling.
        # Will be deleted at __call__.
        if self.use_negative_sampling:
            adj, _ = self.adj.saint_subgraph(n_id)
            row, col, edge_idx = adj.coo()
            full_edge_index = torch.stack([row, col], dim=0)
        else:
            full_edge_index = None

        return Data(edge_index=edge_index, e_id=e_id, n_id=n_id, b_id=b_id,
                    sub_b_id=self.tmp[b_id], full_edge_index=full_edge_index, num_nodes=num_nodes)
コード例 #8
0
    def __produce__(self, n_id):
        r"""Produces a :obj:`DataFlow` object for a given mini-batch
        :obj:`n_id`."""

        data_flow = DataFlow(n_id, self.flow)

        for l in range(self.num_hops):
            e_id = neighbor_sampler(n_id, self.cumdeg, self.size[l])

            new_n_id = self.edge_index_j.index_select(0, e_id)
            if self.add_self_loops:
                new_n_id = torch.cat([new_n_id, n_id], dim=0)
            new_n_id = new_n_id.unique(sorted=False)
            e_id = self.e_assoc[e_id]

            edges = [None, None]

            edge_index_i = self.data.edge_index[self.i, e_id]
            if self.add_self_loops:
                edge_index_i = torch.cat([edge_index_i, n_id], dim=0)

            self.tmp[n_id] = torch.arange(n_id.size(0))
            edges[self.i] = self.tmp[edge_index_i]

            edge_index_j = self.data.edge_index[self.j, e_id]
            if self.add_self_loops:
                edge_index_j = torch.cat([edge_index_j, n_id], dim=0)

            self.tmp[new_n_id] = torch.arange(new_n_id.size(0))
            edges[self.j] = self.tmp[edge_index_j]

            edge_index = torch.stack(edges, dim=0)

            # Remove the edge identifier when adding self-loops to prevent
            # misused behavior.
            e_id = None if self.add_self_loops else e_id
            n_id = new_n_id

            data_flow.append(n_id, e_id, edge_index)

        return data_flow
コード例 #9
0
    def __produce_bipartite_data_flow__(self, n_id):
        r"""Produces a :obj:`DataFlow` object with a bipartite assignment
        matrix for a given mini-batch :obj:`n_id`."""

        data_flow = DataFlow(n_id)

        all_n_id = n_id
        for l in range(self.num_hops):
            e_id = neighbor_sampler(n_id, self.cumdeg, self.nsample[l])
            sub_edge_index = self.edge_index[:, e_id].to(torch.long)

            edges = [None, None]

            # ======================

            row_0 = torch.cat([sub_edge_index[0], all_n_id])
            row_1 = torch.cat([sub_edge_index[1], all_n_id])

            edges[1] = self.__renumerate__(row_1, all_n_id)

            n_id = sub_edge_index[0].unique(sorted=False)

            res_size = all_n_id.size(0)  # target nodes are placed first
            all_n_id = torch.cat([all_n_id, n_id])

            # res_size = all_n_id.size(0)
            # all_n_id = torch.cat([all_n_id, n_id])
            # all_n_id, inv = all_n_id.unique(sorted=False, return_inverse=True)
            # res_n_id = inv[:res_size]

            edges[0] = self.__renumerate__(row_0, all_n_id)

            # ======================

            edge_index = torch.stack(edges, dim=0)
            data_flow.append(all_n_id, res_size, e_id, edge_index)

        return data_flow