def test_segregate_self_loops(): edge_index = torch.tensor([[0, 0, 1], [0, 1, 0]]) out = segregate_self_loops(edge_index) assert out[0].tolist() == [[0, 1], [1, 0]] assert out[1] is None assert out[2].tolist() == [[0], [0]] assert out[3] is None edge_attr = torch.tensor([1, 2, 3]) out = segregate_self_loops(edge_index, edge_attr) assert out[0].tolist() == [[0, 1], [1, 0]] assert out[1].tolist() == [2, 3] assert out[2].tolist() == [[0], [0]] assert out[3].tolist() == [1]
def __init__(self, args, edge_index, num_nodes, size, num_hops, batch_size=1, shuffle=False, drop_last=False, bipartite=True, add_self_loops=False, flow='source_to_target'): self.model_name = args.model_name self.edge_index = edge_index[:2, :] self.edge_type = edge_index[2,:] self.use_hint = args.selarhint self.num_nodes = num_nodes self.size = size self.num_hops = num_hops self.batch_size = batch_size self.shuffle = shuffle self.drop_last = drop_last self.bipartite = bipartite self.add_self_loops = add_self_loops self.flow = flow self.e_id = torch.arange(self.edge_index.size(1)) if bipartite and add_self_loops: tmp = segregate_self_loops(self.edge_index, self.e_id) self.edge_index, self.e_id, self.edge_index_loop = tmp[:3] self.e_id_loop = self.e_id.new_full((self.num_nodes,), -1) self.e_id_loop[tmp[2][0]] = tmp[3] assert flow in ['source_to_target', 'target_to_source'] self.i, self.j = (0, 1) if flow == 'target_to_source' else (1, 0) edge_index_i, self.e_assoc = self.edge_index[self.i].sort() self.edge_index_j = self.edge_index[self.j, self.e_assoc] deg = degree(edge_index_i, self.num_nodes, dtype=torch.long) self.cumdeg = torch.cat([deg.new_zeros(1), deg.cumsum(0)]) self.tmp = torch.empty(self.num_nodes, dtype=torch.long)
def __init__(self, data, size, num_hops, batch_size=1, shuffle=False, drop_last=False, bipartite=True, add_self_loops=False, flow='source_to_target'): if neighbor_sampler is None: raise ImportError('`NeighborSampler` requires `torch-cluster`.') self.data = data self.size = repeat(size, num_hops) self.num_hops = num_hops self.batch_size = batch_size self.shuffle = shuffle self.drop_last = drop_last self.bipartite = bipartite self.add_self_loops = add_self_loops self.flow = flow self.edge_index = data.edge_index self.e_id = torch.arange(self.edge_index.size(1)) if bipartite and add_self_loops: tmp = segregate_self_loops(self.edge_index, self.e_id) self.edge_index, self.e_id, self.edge_index_loop = tmp[:3] self.e_id_loop = self.e_id.new_full((data.num_nodes, ), -1) self.e_id_loop[tmp[2][0]] = tmp[3] assert flow in ['source_to_target', 'target_to_source'] self.i, self.j = (0, 1) if flow == 'target_to_source' else (1, 0) edge_index_i, self.e_assoc = self.edge_index[self.i].sort() self.edge_index_j = self.edge_index[self.j, self.e_assoc] deg = degree(edge_index_i, data.num_nodes, dtype=torch.long) self.cumdeg = torch.cat([deg.new_zeros(1), deg.cumsum(0)]) self.tmp = torch.empty(data.num_nodes, dtype=torch.long)
def remove_isolated_nodes(edge_index, edge_attr=None, num_nodes=None): num_nodes = maybe_num_nodes(edge_index, num_nodes) out = segregate_self_loops(edge_index, edge_attr) edge_index, edge_attr, loop_edge_index, loop_edge_attr = out mask = torch.zeros(num_nodes, dtype=torch.bool, device=edge_index.device) mask[edge_index.view(-1)] = 1 assoc = torch.full((num_nodes, ), -1, dtype=torch.long, device=mask.device) assoc[mask] = torch.arange(mask.type(torch.long).sum(), device=assoc.device) edge_index = assoc[edge_index] loop_mask = torch.zeros_like(mask) loop_mask[loop_edge_index[0]] = 1 loop_mask = loop_mask.type(torch.uint8) & mask.type(torch.uint8) loop_assoc = torch.full_like(assoc, -1) loop_assoc[loop_edge_index[0]] = torch.arange(loop_edge_index.size(1), device=loop_assoc.device) loop_idx = loop_assoc[loop_mask] loop_edge_index = assoc[loop_edge_index[:, loop_idx]] edge_index = torch.cat([edge_index, loop_edge_index], dim=1) if edge_attr is not None: loop_edge_attr = loop_edge_attr[loop_idx] edge_attr = torch.cat([edge_attr, loop_edge_attr], dim=0) return edge_index, edge_attr, mask
def remove_isolated_nodes(edge_index, edge_attr=None, num_nodes=None): r"""Removes the isolated nodes from the graph given by :attr:`edge_index` with optional edge attributes :attr:`edge_attr`. In addition, returns a mask of shape :obj:`[num_nodes]` to manually filter out isolated node features later on. Self-loops are preserved for non-isolated nodes. Args: edge_index (LongTensor): The edge indices. edge_attr (Tensor, optional): Edge weights or multi-dimensional edge features. (default: :obj:`None`) num_nodes (int, optional): The number of nodes, *i.e.* :obj:`max_val + 1` of :attr:`edge_index`. (default: :obj:`None`) :rtype: (LongTensor, Tensor, BoolTensor) """ num_nodes = maybe_num_nodes(edge_index, num_nodes) out = segregate_self_loops(edge_index, edge_attr) edge_index, edge_attr, loop_edge_index, loop_edge_attr = out mask = torch.zeros(num_nodes, dtype=torch.bool, device=edge_index.device) mask[edge_index.view(-1)] = 1 assoc = torch.full((num_nodes, ), -1, dtype=torch.long, device=mask.device) assoc[mask] = torch.arange(mask.sum(), device=assoc.device) edge_index = assoc[edge_index] loop_mask = torch.zeros_like(mask) loop_mask[loop_edge_index[0]] = 1 loop_mask = loop_mask & mask loop_assoc = torch.full_like(assoc, -1) loop_assoc[loop_edge_index[0]] = torch.arange(loop_edge_index.size(1), device=loop_assoc.device) loop_idx = loop_assoc[loop_mask] loop_edge_index = assoc[loop_edge_index[:, loop_idx]] edge_index = torch.cat([edge_index, loop_edge_index], dim=1) if edge_attr is not None: loop_edge_attr = loop_edge_attr[loop_idx] edge_attr = torch.cat([edge_attr, loop_edge_attr], dim=0) return edge_index, edge_attr, mask
def __init__(self, args, edge_index, num_nodes, size, num_hops, batch_size=1, shuffle=False, drop_last=False, bipartite=True, add_self_loops=False, flow='source_to_target'): self.model_name = args.model_name self.use_hint = args.selarhint self.n_meta = args.n_meta self.metapath = args.metapath self.dataset = args.dataset self.edge_index = edge_index[:2, :] self.edge_type = edge_index[2,:] self.num_nodes = num_nodes self.size = size self.num_hops = num_hops self.batch_size = batch_size self.shuffle = shuffle self.drop_last = drop_last self.bipartite = bipartite self.add_self_loops = add_self_loops self.flow = flow self.e_id = torch.arange(self.edge_index.size(1)) if bipartite and add_self_loops: tmp = segregate_self_loops(self.edge_index, self.e_id) self.edge_index, self.e_id, self.edge_index_loop = tmp[:3] self.e_id_loop = self.e_id.new_full((self.num_nodes,), -1) self.e_id_loop[tmp[2][0]] = tmp[3] assert flow in ['source_to_target', 'target_to_source'] self.i, self.j = (0, 1) if flow == 'target_to_source' else (1, 0) edge_index_i, self.e_assoc = self.edge_index[self.i].sort() self.edge_index_j = self.edge_index[self.j, self.e_assoc] self.edge_type = self.edge_type[self.e_assoc] deg = degree(edge_index_i, self.num_nodes, dtype=torch.long) self.cumdeg = torch.cat([deg.new_zeros(1), deg.cumsum(0)]) self.tmp = torch.empty(self.num_nodes, dtype=torch.long) if 'music' in args.dataset: num_iter = 199 elif 'book' in args.dataset: num_iter = 82 self.meta1, self.meta2, self.n_meta1, self.n_meta2 = [],[],[],[] for i in self.metapath: idx = list(range(10000)) for n, j in enumerate(idx): with open('../data/{}/meta_labels/pos_meta{}_{}.pickle'.format(self.dataset, i, j), 'rb') as f: meta = pickle.load(f).T if n == 0: metapath = meta else: metapath = torch.cat((metapath, meta), 1) if (metapath.size(1) >= self.batch_size*num_iter) or (j == 9): break self.meta1.append(metapath[0,:]) self.meta2.append(metapath[1,:]) if args.dataset != 'book': for i in self.metapath: idx = list(range(10000)) for n, j in enumerate(idx): with open('../data/{}/meta_labels/neg_meta{}_{}.pickle'.format(self.dataset, i, j), 'rb') as f: meta = pickle.load(f).T if n == 0: metapath = meta else: metapath = torch.cat((metapath, meta), 1) if (metapath.size(1) >= self.batch_size*num_iter) or (j ==9): break self.n_meta1.append(metapath[0,:]) self.n_meta2.append(metapath[1,:]) else: neg = [] for i in range(self.n_meta): u_unique = torch.unique(self.meta1[i]) i_unique = torch.unique(self.meta2[i]) edge_set = set([str(self.meta1[i][j].item()) + "," + str(self.meta2[i][j].item()) for j in range(len(self.meta1[i]))]) u_sample = np.random.choice(u_unique.tolist(), size=len(self.meta1[i])*10, replace=True) i_sample = np.random.choice(i_unique.tolist(), size=len(self.meta1[i])*10, replace=True) sampled_edge_set = set([]) sampled_ind = [] for k in range(len(self.meta1[i])*10): node1 = u_sample[k].item() node2 = i_sample[k].item() edge_str = str(node1) +","+ str(node2) if not edge_str in edge_set and not edge_str in sampled_edge_set and not node1 == node2: sampled_edge_set.add(edge_str) sampled_ind.append(k) if len(sampled_ind) == len(self.meta1[i]): break self.n_meta1.append(torch.from_numpy(u_sample[sampled_ind])) self.n_meta2.append(torch.from_numpy(i_sample[sampled_ind])) del sampled_edge_set del metapath del meta