Example #1
0
def test_segregate_self_loops():
    edge_index = torch.tensor([[0, 0, 1], [0, 1, 0]])

    out = segregate_self_loops(edge_index)
    assert out[0].tolist() == [[0, 1], [1, 0]]
    assert out[1] is None
    assert out[2].tolist() == [[0], [0]]
    assert out[3] is None

    edge_attr = torch.tensor([1, 2, 3])
    out = segregate_self_loops(edge_index, edge_attr)
    assert out[0].tolist() == [[0, 1], [1, 0]]
    assert out[1].tolist() == [2, 3]
    assert out[2].tolist() == [[0], [0]]
    assert out[3].tolist() == [1]
Example #2
0
    def __init__(self, args, edge_index, num_nodes, size, num_hops, batch_size=1, shuffle=False,
                 drop_last=False, bipartite=True, add_self_loops=False,
                 flow='source_to_target'):
        self.model_name = args.model_name
        self.edge_index = edge_index[:2, :]
        self.edge_type = edge_index[2,:]
        self.use_hint = args.selarhint
        self.num_nodes = num_nodes
        self.size = size
        self.num_hops = num_hops
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.drop_last = drop_last
        self.bipartite = bipartite
        self.add_self_loops = add_self_loops
        self.flow = flow
        self.e_id = torch.arange(self.edge_index.size(1))

        if bipartite and add_self_loops:
            tmp = segregate_self_loops(self.edge_index, self.e_id)
            self.edge_index, self.e_id, self.edge_index_loop = tmp[:3]
            self.e_id_loop = self.e_id.new_full((self.num_nodes,), -1)
            self.e_id_loop[tmp[2][0]] = tmp[3]

        assert flow in ['source_to_target', 'target_to_source']
        self.i, self.j = (0, 1) if flow == 'target_to_source' else (1, 0)

        edge_index_i, self.e_assoc = self.edge_index[self.i].sort()
        self.edge_index_j = self.edge_index[self.j, self.e_assoc]
        deg = degree(edge_index_i, self.num_nodes, dtype=torch.long)
        self.cumdeg = torch.cat([deg.new_zeros(1), deg.cumsum(0)])
        self.tmp = torch.empty(self.num_nodes, dtype=torch.long)
Example #3
0
    def __init__(self, data, size, num_hops, batch_size=1, shuffle=False,
                 drop_last=False, bipartite=True, add_self_loops=False,
                 flow='source_to_target'):

        if neighbor_sampler is None:
            raise ImportError('`NeighborSampler` requires `torch-cluster`.')

        self.data = data
        self.size = repeat(size, num_hops)
        self.num_hops = num_hops
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.drop_last = drop_last
        self.bipartite = bipartite
        self.add_self_loops = add_self_loops
        self.flow = flow

        self.edge_index = data.edge_index
        self.e_id = torch.arange(self.edge_index.size(1))
        if bipartite and add_self_loops:
            tmp = segregate_self_loops(self.edge_index, self.e_id)
            self.edge_index, self.e_id, self.edge_index_loop = tmp[:3]
            self.e_id_loop = self.e_id.new_full((data.num_nodes, ), -1)
            self.e_id_loop[tmp[2][0]] = tmp[3]

        assert flow in ['source_to_target', 'target_to_source']
        self.i, self.j = (0, 1) if flow == 'target_to_source' else (1, 0)

        edge_index_i, self.e_assoc = self.edge_index[self.i].sort()
        self.edge_index_j = self.edge_index[self.j, self.e_assoc]
        deg = degree(edge_index_i, data.num_nodes, dtype=torch.long)
        self.cumdeg = torch.cat([deg.new_zeros(1), deg.cumsum(0)])

        self.tmp = torch.empty(data.num_nodes, dtype=torch.long)
Example #4
0
def remove_isolated_nodes(edge_index, edge_attr=None, num_nodes=None):
    num_nodes = maybe_num_nodes(edge_index, num_nodes)

    out = segregate_self_loops(edge_index, edge_attr)
    edge_index, edge_attr, loop_edge_index, loop_edge_attr = out

    mask = torch.zeros(num_nodes, dtype=torch.bool, device=edge_index.device)
    mask[edge_index.view(-1)] = 1

    assoc = torch.full((num_nodes, ), -1, dtype=torch.long, device=mask.device)
    assoc[mask] = torch.arange(mask.type(torch.long).sum(), device=assoc.device)
    edge_index = assoc[edge_index]

    loop_mask = torch.zeros_like(mask)
    loop_mask[loop_edge_index[0]] = 1
    loop_mask = loop_mask.type(torch.uint8) & mask.type(torch.uint8)
    loop_assoc = torch.full_like(assoc, -1)
    loop_assoc[loop_edge_index[0]] = torch.arange(loop_edge_index.size(1),
                                                  device=loop_assoc.device)
    loop_idx = loop_assoc[loop_mask]
    loop_edge_index = assoc[loop_edge_index[:, loop_idx]]

    edge_index = torch.cat([edge_index, loop_edge_index], dim=1)

    if edge_attr is not None:
        loop_edge_attr = loop_edge_attr[loop_idx]
        edge_attr = torch.cat([edge_attr, loop_edge_attr], dim=0)

    return edge_index, edge_attr, mask
Example #5
0
def remove_isolated_nodes(edge_index, edge_attr=None, num_nodes=None):
    r"""Removes the isolated nodes from the graph given by :attr:`edge_index`
    with optional edge attributes :attr:`edge_attr`.
    In addition, returns a mask of shape :obj:`[num_nodes]` to manually filter
    out isolated node features later on.
    Self-loops are preserved for non-isolated nodes.

    Args:
        edge_index (LongTensor): The edge indices.
        edge_attr (Tensor, optional): Edge weights or multi-dimensional
            edge features. (default: :obj:`None`)
        num_nodes (int, optional): The number of nodes, *i.e.*
            :obj:`max_val + 1` of :attr:`edge_index`. (default: :obj:`None`)

    :rtype: (LongTensor, Tensor, BoolTensor)
    """
    num_nodes = maybe_num_nodes(edge_index, num_nodes)

    out = segregate_self_loops(edge_index, edge_attr)
    edge_index, edge_attr, loop_edge_index, loop_edge_attr = out

    mask = torch.zeros(num_nodes, dtype=torch.bool, device=edge_index.device)
    mask[edge_index.view(-1)] = 1

    assoc = torch.full((num_nodes, ), -1, dtype=torch.long, device=mask.device)
    assoc[mask] = torch.arange(mask.sum(), device=assoc.device)
    edge_index = assoc[edge_index]

    loop_mask = torch.zeros_like(mask)
    loop_mask[loop_edge_index[0]] = 1
    loop_mask = loop_mask & mask
    loop_assoc = torch.full_like(assoc, -1)
    loop_assoc[loop_edge_index[0]] = torch.arange(loop_edge_index.size(1),
                                                  device=loop_assoc.device)
    loop_idx = loop_assoc[loop_mask]
    loop_edge_index = assoc[loop_edge_index[:, loop_idx]]

    edge_index = torch.cat([edge_index, loop_edge_index], dim=1)

    if edge_attr is not None:
        loop_edge_attr = loop_edge_attr[loop_idx]
        edge_attr = torch.cat([edge_attr, loop_edge_attr], dim=0)

    return edge_index, edge_attr, mask
Example #6
0
    def __init__(self, args, edge_index, num_nodes, size, num_hops, batch_size=1, shuffle=False,
                 drop_last=False, bipartite=True, add_self_loops=False,
                 flow='source_to_target'):
        self.model_name = args.model_name
        self.use_hint = args.selarhint
        self.n_meta = args.n_meta
        self.metapath = args.metapath
        self.dataset = args.dataset
        self.edge_index = edge_index[:2, :]
        self.edge_type = edge_index[2,:]
        self.num_nodes = num_nodes
        self.size = size
        self.num_hops = num_hops
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.drop_last = drop_last
        self.bipartite = bipartite
        self.add_self_loops = add_self_loops
        self.flow = flow
        self.e_id = torch.arange(self.edge_index.size(1))

        if bipartite and add_self_loops:
            tmp = segregate_self_loops(self.edge_index, self.e_id)
            self.edge_index, self.e_id, self.edge_index_loop = tmp[:3]
            self.e_id_loop = self.e_id.new_full((self.num_nodes,), -1)
            self.e_id_loop[tmp[2][0]] = tmp[3]

        assert flow in ['source_to_target', 'target_to_source']
        self.i, self.j = (0, 1) if flow == 'target_to_source' else (1, 0)

        edge_index_i, self.e_assoc = self.edge_index[self.i].sort()
        self.edge_index_j = self.edge_index[self.j, self.e_assoc]
        self.edge_type = self.edge_type[self.e_assoc]
        deg = degree(edge_index_i, self.num_nodes, dtype=torch.long)
        self.cumdeg = torch.cat([deg.new_zeros(1), deg.cumsum(0)])
        self.tmp = torch.empty(self.num_nodes, dtype=torch.long)

        if 'music' in args.dataset:
            num_iter = 199
        elif 'book' in args.dataset:
            num_iter = 82

        self.meta1, self.meta2, self.n_meta1, self.n_meta2 = [],[],[],[]
        for i in self.metapath:
            idx = list(range(10000))
            for n, j in enumerate(idx):
                with open('../data/{}/meta_labels/pos_meta{}_{}.pickle'.format(self.dataset, i, j), 'rb') as f:
                    meta = pickle.load(f).T
                if n == 0:
                    metapath = meta
                else:
                    metapath = torch.cat((metapath, meta), 1)
                if (metapath.size(1) >= self.batch_size*num_iter) or (j == 9):
                    break
            self.meta1.append(metapath[0,:])
            self.meta2.append(metapath[1,:])                

        if args.dataset != 'book':
            for i in self.metapath:
                idx = list(range(10000))
                for n, j in enumerate(idx):
                    with open('../data/{}/meta_labels/neg_meta{}_{}.pickle'.format(self.dataset, i, j), 'rb') as f:
                        meta = pickle.load(f).T
                    if n == 0:
                        metapath = meta
                    else:
                        metapath = torch.cat((metapath, meta), 1)
                    if (metapath.size(1) >= self.batch_size*num_iter) or (j ==9):
                        break
                self.n_meta1.append(metapath[0,:])
                self.n_meta2.append(metapath[1,:])
        else:
            neg = []
            for i in range(self.n_meta):
                u_unique = torch.unique(self.meta1[i])
                i_unique = torch.unique(self.meta2[i])

                edge_set = set([str(self.meta1[i][j].item()) + "," + str(self.meta2[i][j].item()) for j in range(len(self.meta1[i]))])
                u_sample = np.random.choice(u_unique.tolist(), size=len(self.meta1[i])*10, replace=True)
                i_sample = np.random.choice(i_unique.tolist(), size=len(self.meta1[i])*10, replace=True)
                
                sampled_edge_set = set([])
                sampled_ind = []
                for k in range(len(self.meta1[i])*10):
                    node1 = u_sample[k].item()
                    node2 = i_sample[k].item()
                    edge_str = str(node1) +","+ str(node2)
                    if not edge_str in edge_set and not edge_str in sampled_edge_set and not node1 == node2:
                        sampled_edge_set.add(edge_str)
                        sampled_ind.append(k)
                    if len(sampled_ind) == len(self.meta1[i]):
                        break
                self.n_meta1.append(torch.from_numpy(u_sample[sampled_ind]))
                self.n_meta2.append(torch.from_numpy(i_sample[sampled_ind]))
                del sampled_edge_set
        del metapath
        del meta