Esempio n. 1
0
def degree_matrix(adj: SparseTensor, indeg=True):
    N = adj.size(-1)
    deg = adj.sum(0) if indeg else adj.sum(1)
    row = col = torch.arange(N, device=adj.device())
    degs = torch.as_tensor(deg, device=adj.device())
    return SparseTensor(
        row=row, col=col, value=degs, sparse_sizes=(N, N), is_sorted=True
    )
Esempio n. 2
0
def test_get_data_size():
    x = torch.randn(10, 128)
    row, col = torch.randint(0, 10, (2, 100), dtype=torch.long)
    adj_t = SparseTensor(row=row, col=col, value=None, sparse_sizes=(10, 10))
    data = Data(x=x, y=x, adj_t=adj_t)

    data_size = get_data_size(data)
    assert data_size == 10 * 128 * 4 + 11 * 8 + 100 * 8
Esempio n. 3
0
def radius(x, r=0.5, loop=False, dtype=None, device=None):
    N, D = x.shape
    batch = torch.zeros(N, dtype=torch.long)
    edge_index = radius_graph(x, r, batch=batch, loop=loop).to(device)
    edge_val = torch.ones(edge_index.shape[-1], dtype=dtype, device=device)
    return SparseTensor(
        row=edge_index[0], col=edge_index[1], value=edge_val, sparse_sizes=(N, N)
    )
Esempio n. 4
0
    def setup_geom(self):
        edge_file = self.root / self.name / 'out1_graph_edges.txt'
        feature_label_file = self.root / self.name / 'out1_node_feature_label.txt'

        self.metric = 'Accuracy'

        edges = edge_file.open('r').readlines()[1:]
        edges = torch.LongTensor([
            (lambda x: [int(x[0]), int(x[1])])(edge.strip().split('\t'))
            for edge in edges
        ])
        self.num_nodes = torch.max(edges).item() + 1
        self.adj_t = SparseTensor(row=torch.LongTensor(edges[:, 0]),
                                  col=torch.LongTensor(edges[:, 1]),
                                  sparse_sizes=(self.num_nodes,
                                                self.num_nodes))
        # self.adj_t = self.adj_t.to_symmetric()

        if self.make_edge_index:
            self.edge_index = edges.t()

        idx = []
        x = []
        y = []
        xy = feature_label_file.open('r').readlines()[1:]
        for line in xy:
            node_id, feature, label = line.strip().split('\t')
            idx.append(int(node_id))

            if self.name == 'actor':
                one_hot = torch.zeros(932)
                pos_with_ones = list(map(int, feature.split(',')))
                one_hot[pos_with_ones] = 1
                x.append(one_hot.int().tolist())
            else:
                x.append(list(map(int, feature.split(','))))
            y.append(int(label))

        _, indices = torch.sort(torch.LongTensor(idx))
        self.x = torch.LongTensor(x)[indices]
        self.y = torch.LongTensor(y).view(-1, 1)[indices]
        self.num_classes = torch.max(self.y).item() + 1

        idx = torch.arange(self.y.shape[0]).view(-1, 1)
        train_idx, val_test_idx = train_test_split(idx,
                                                   test_size=0.4,
                                                   stratify=self.y)
        val_idx, test_idx = train_test_split(
            val_test_idx,
            test_size=0.5,
            stratify=self.y[val_test_idx.squeeze()])
        self.split_idx = {
            'train': train_idx.view(-1),
            'valid': val_idx.view(-1),
            'test': test_idx.view(-1)
        }

        self.criterion = torch.nn.CrossEntropyLoss()
Esempio n. 5
0
    def __init__(self,
                 data,
                 batch_size,
                 num_steps=1,
                 sample_coverage=50,
                 save_dir=None,
                 num_workers=0,
                 log=True):
        assert data.edge_index is not None
        assert 'node_norm' not in data
        assert 'edge_norm' not in data

        self.N = N = data.num_nodes
        self.E = data.num_edges

        self.adj = SparseTensor(row=data.edge_index[0],
                                col=data.edge_index[1],
                                value=data.edge_attr,
                                sparse_sizes=(N, N))

        self.data = copy.copy(data)
        self.data.edge_index = None
        self.data.edge_attr = None

        self.batch_size = batch_size
        self.num_steps = num_steps
        self.sample_coverage = sample_coverage
        self.num_workers = num_workers
        self.log = log
        self.__count__ = 0

        if self.num_workers > 0:
            self.__sample_queue__ = Queue()
            self.__sample_workers__ = []
            for _ in range(self.num_workers):
                worker = Process(target=self.__put_sample__,
                                 args=(self.__sample_queue__, ))
                worker.daemon = True
                worker.start()
                self.__sample_workers__.append(worker)

        path = osp.join(save_dir or '', self.__filename__)
        if save_dir is not None and osp.exists(path):  # pragma: no cover
            self.node_norm, self.edge_norm = torch.load(path)
        else:
            self.node_norm, self.edge_norm = self.__compute_norm__()
            if save_dir is not None:  # pragma: no cover
                torch.save((self.node_norm, self.edge_norm), path)

        if self.num_workers > 0:
            self.__data_queue__ = Queue()
            self.__data_workers__ = []
            for _ in range(self.num_workers):
                worker = Process(target=self.__put_data__,
                                 args=(self.__data_queue__, ))
                worker.daemon = True
                worker.start()
                self.__data_workers__.append(worker)
Esempio n. 6
0
def test_nn_conv():
    x1 = torch.randn(4, 8)
    x2 = torch.randn(2, 16)
    edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]])
    row, col = edge_index
    value = torch.rand(row.size(0), 3)
    adj = SparseTensor(row=row, col=col, value=value, sparse_sizes=(4, 4))

    nn = Seq(Lin(3, 32), ReLU(), Lin(32, 8 * 32))
    conv = NNConv(8, 32, nn=nn)
    assert conv.__repr__() == (
        'NNConv(8, 32, aggr="add", nn=Sequential(\n'
        '  (0): Linear(in_features=3, out_features=32, bias=True)\n'
        '  (1): ReLU()\n'
        '  (2): Linear(in_features=32, out_features=256, bias=True)\n'
        '))')
    out = conv(x1, edge_index, value)
    assert out.size() == (4, 32)
    assert conv(x1, edge_index, value, size=(4, 4)).tolist() == out.tolist()
    assert conv(x1, adj.t()).tolist() == out.tolist()

    t = '(Tensor, Tensor, OptTensor, Size) -> Tensor'
    jit = torch.jit.script(conv.jittable(t))
    assert jit(x1, edge_index, value).tolist() == out.tolist()
    assert jit(x1, edge_index, value, size=(4, 4)).tolist() == out.tolist()

    t = '(Tensor, SparseTensor, OptTensor, Size) -> Tensor'
    jit = torch.jit.script(conv.jittable(t))
    assert jit(x1, adj.t()).tolist() == out.tolist()

    adj = adj.sparse_resize((4, 2))
    conv = NNConv((8, 16), 32, nn=nn)
    assert conv.__repr__() == (
        'NNConv((8, 16), 32, aggr="add", nn=Sequential(\n'
        '  (0): Linear(in_features=3, out_features=32, bias=True)\n'
        '  (1): ReLU()\n'
        '  (2): Linear(in_features=32, out_features=256, bias=True)\n'
        '))')
    out1 = conv((x1, x2), edge_index, value)
    out2 = conv((x1, None), edge_index, value, (4, 2))
    assert out1.size() == (2, 32)
    assert out2.size() == (2, 32)
    assert conv((x1, x2), edge_index, value, (4, 2)).tolist() == out1.tolist()
    assert conv((x1, x2), adj.t()).tolist() == out1.tolist()
    assert conv((x1, None), adj.t()).tolist() == out2.tolist()

    t = '(OptPairTensor, Tensor, OptTensor, Size) -> Tensor'
    jit = torch.jit.script(conv.jittable(t))
    assert jit((x1, x2), edge_index, value).tolist() == out1.tolist()
    assert jit((x1, x2), edge_index, value,
               size=(4, 2)).tolist() == out1.tolist()
    assert jit((x1, None), edge_index, value,
               size=(4, 2)).tolist() == out2.tolist()

    t = '(OptPairTensor, SparseTensor, OptTensor, Size) -> Tensor'
    jit = torch.jit.script(conv.jittable(t))
    assert jit((x1, x2), adj.t()).tolist() == out1.tolist()
    assert jit((x1, None), adj.t()).tolist() == out2.tolist()
def preprocess(data,
               preprocess="diffusion",
               num_propagations=10,
               p=None,
               alpha=None,
               use_cache=True,
               post_fix=""):
    if use_cache:
        try:
            x = torch.load(f'embeddings/{preprocess}{post_fix}.pt')
            print('Using cache')
            return x
        except:
            print(
                f'embeddings/{preprocess}{post_fix}.pt not found or not enough iterations! Regenerating it now'
            )
            # Creates a new file
            with open(f'embeddings/{preprocess}{post_fix}.pt', 'w') as fp:
                pass

    if preprocess == "community":
        return community(data, post_fix)

    if preprocess == "spectral":
        return spectral(data, post_fix)

    print('Computing adj...')
    N = data.num_nodes
    data.edge_index = to_undirected(data.edge_index, data.num_nodes)

    row, col = data.edge_index
    adj = SparseTensor(row=row, col=col, sparse_sizes=(N, N))
    adj = adj.set_diag()
    deg = adj.sum(dim=1).to(torch.float)
    deg_inv_sqrt = deg.pow(-0.5)
    deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
    adj = deg_inv_sqrt.view(-1, 1) * adj * deg_inv_sqrt.view(1, -1)

    adj = adj.to_scipy(layout='csr')

    sgc_dict = {}

    print(f'Start {preprocess} processing')

    if preprocess == "sgc":
        result = sgc(data.x.numpy(), adj, num_propagations)
#     if preprocess == "lp":
#         result = lp(adj, data.y.data, num_propagations, p = p, alpha = alpha, preprocess = preprocess)
    if preprocess == "diffusion":
        result = diffusion(data.x.numpy(),
                           adj,
                           num_propagations,
                           p=p,
                           alpha=alpha)

    torch.save(result, f'embeddings/{preprocess}{post_fix}.pt')

    return result
Esempio n. 8
0
def from_cpx(mat):
    wgt = from_dlpack(mat.data.toDlpack())
    rowptr = from_dlpack(mat.indptr.toDlpack()).to(torch.long)
    col = from_dlpack(mat.indices.toDlpack()).to(torch.long)
    return SparseTensor(rowptr=rowptr,
                        col=col,
                        value=wgt,
                        sparse_sizes=mat.shape,
                        is_sorted=True)
Esempio n. 9
0
def test_my_default_arg_conv():
    x = torch.randn(4, 1)
    edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]])
    row, col = edge_index
    adj = SparseTensor(row=row, col=col, sparse_sizes=(4, 4))

    conv = MyDefaultArgConv()
    assert conv(x, edge_index).view(-1).tolist() == [0, 0, 0, 0]
    assert conv(x, adj.t()).view(-1).tolist() == [0, 0, 0, 0]
Esempio n. 10
0
def test_my_conv():
    x1 = torch.randn(4, 8)
    x2 = torch.randn(2, 16)
    edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]])
    row, col = edge_index
    value = torch.randn(row.size(0))
    adj = SparseTensor(row=row, col=col, value=value, sparse_sizes=(4, 4))

    conv = MyConv(8, 32)
    out = conv(x1, edge_index, value)
    assert out.size() == (4, 32)
    assert conv(x1, edge_index, value, (4, 4)).tolist() == out.tolist()
    assert conv(x1, adj.t()).tolist() == out.tolist()
    conv.fuse = False
    assert conv(x1, adj.t()).tolist() == out.tolist()
    conv.fuse = True

    t = '(Tensor, Tensor, OptTensor, Size) -> Tensor'
    jit = torch.jit.script(conv.jittable(t))
    assert jit(x1, edge_index, value).tolist() == out.tolist()
    assert jit(x1, edge_index, value, (4, 4)).tolist() == out.tolist()

    t = '(Tensor, SparseTensor, OptTensor, Size) -> Tensor'
    jit = torch.jit.script(conv.jittable(t))
    assert jit(x1, adj.t()).tolist() == out.tolist()
    jit.fuse = False
    assert jit(x1, adj.t()).tolist() == out.tolist()
    jit.fuse = True

    adj = adj.sparse_resize((4, 2))
    conv = MyConv((8, 16), 32)
    out1 = conv((x1, x2), edge_index, value)
    out2 = conv((x1, None), edge_index, value, (4, 2))
    assert out1.size() == (2, 32)
    assert out2.size() == (2, 32)
    assert conv((x1, x2), edge_index, value, (4, 2)).tolist() == out1.tolist()
    assert conv((x1, x2), adj.t()).tolist() == out1.tolist()
    assert conv((x1, None), adj.t()).tolist() == out2.tolist()
    conv.fuse = False
    assert conv((x1, x2), adj.t()).tolist() == out1.tolist()
    assert conv((x1, None), adj.t()).tolist() == out2.tolist()
    conv.fuse = True

    t = '(OptPairTensor, Tensor, OptTensor, Size) -> Tensor'
    jit = torch.jit.script(conv.jittable(t))
    assert jit((x1, x2), edge_index, value).tolist() == out1.tolist()
    assert jit((x1, x2), edge_index, value, (4, 2)).tolist() == out1.tolist()
    assert jit((x1, None), edge_index, value, (4, 2)).tolist() == out2.tolist()

    t = '(OptPairTensor, SparseTensor, OptTensor, Size) -> Tensor'
    jit = torch.jit.script(conv.jittable(t))
    assert jit((x1, x2), adj.t()).tolist() == out1.tolist()
    assert jit((x1, None), adj.t()).tolist() == out2.tolist()
    jit.fuse = False
    assert jit((x1, x2), adj.t()).tolist() == out1.tolist()
    assert jit((x1, None), adj.t()).tolist() == out2.tolist()
    jit.fuse = True
Esempio n. 11
0
 def prune(self):
     # start_time = time.time()
     self.mask = self.pruner.prune(self.edge_index)
     # print("--- %s seconds ---" % (time.time() - start_time))
     start_time = time.time()
     self.adj = SparseTensor(
         row=self.edge_index[0,self.mask], col=self.edge_index[1,self.mask],
         value=torch.arange(self.E, device=self.edge_index.device)[self.mask],
         sparse_sizes=(self.N, self.N))
Esempio n. 12
0
    def build_batch(data: torch_geometric.data.Batch,
                    id2graphlet: Dict[int, Subgraph],
                    common_file=None) -> "Batch":
        # Check if graphlet_id == 0 exists in x because of how remap works
        graphlet_id_zero = (data.x == 0).any().item()

        # remap graphlet_ids to (0..len(data.x.unique()))
        remapped_graphlet_ids, mapping = renumber(
            data.x.numpy(),
            start=0 + int(graphlet_id_zero),
            in_place=False,
            preserve_zero=graphlet_id_zero)

        batch_graphlet_indices = torch.tensor(remapped_graphlet_ids.flatten(),
                                              dtype=torch.int64)

        graphlet_ids_sorted_by_new_id = (
            list(
                map(
                    lambda e: e[0],  # get key of
                    # (key, value) sorted by value
                    sorted(mapping.items(), key=lambda e: e[1]))))

        xs = []
        edge_indices = []

        # create list of xs and edge_indices where
        # xs[i] are the features of the graphlet that was mapped to
        # new_id == i etc.
        for i, graphlet_id in enumerate(graphlet_ids_sorted_by_new_id):
            graphlet = id2graphlet[graphlet_id]
            xs.append(graphlet.x)
            edge_indices.append(graphlet.edge_index + i * graphlet.x.size(0))

        if common_file is not None:
            common = np.loadtxt(str(common_file), dtype=np.int64)
            common = torch.tensor(
                list(map(lambda x: mapping.get(x, -100), common)))
            common = (batch_graphlet_indices == common.reshape(-1, 1)).any(0)
            data.estimates[~common] = 0

        # Sparse matrix where each row represents a graph
        # and each column a graphlet where
        # m[graph][graphlet] == count of graphlet in graph
        graph_has_graphlet = SparseTensor(row=data.batch,
                                          col=batch_graphlet_indices,
                                          value=data.estimates)

        if graph_has_graphlet.density(
        ) > .75:  # FIXME: update parameter if necessary
            graph_has_graphlet = graph_has_graphlet.to_dense()

        return Batch(x=torch.cat(xs, dim=0),
                     edge_index=torch.cat(edge_indices, dim=1),
                     graph_has_graphlet=graph_has_graphlet,
                     graphlet_ids=graphlet_ids_sorted_by_new_id,
                     y=data.y)
Esempio n. 13
0
def dgl_to_pyg_graph(g):
    eidx = g.edges()
    N = g.number_of_nodes()
    E = g.number_of_edges()
    adj_t = SparseTensor(row=eidx[0],
                         col=eidx[1],
                         value=th.ones(E).float(),
                         sparse_sizes=(N, N)).t()
    return eidx, adj_t
Esempio n. 14
0
def test_gmm_conv(separate_gaussians):
    x1 = torch.randn(4, 8)
    x2 = torch.randn(2, 16)
    edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]])
    row, col = edge_index
    value = torch.rand(row.size(0), 3)
    adj = SparseTensor(row=row, col=col, value=value, sparse_sizes=(4, 4))

    conv = GMMConv(8,
                   32,
                   dim=3,
                   kernel_size=25,
                   separate_gaussians=separate_gaussians)
    assert conv.__repr__() == 'GMMConv(8, 32, dim=3)'
    out = conv(x1, edge_index, value)
    assert out.size() == (4, 32)
    assert torch.allclose(conv(x1, edge_index, value, size=(4, 4)), out)
    assert torch.allclose(conv(x1, adj.t()), out)

    if is_full_test():
        t = '(Tensor, Tensor, OptTensor, Size) -> Tensor'
        jit = torch.jit.script(conv.jittable(t))
        assert torch.allclose(jit(x1, edge_index, value), out)
        assert torch.allclose(jit(x1, edge_index, value, size=(4, 4)), out)

        t = '(Tensor, SparseTensor, OptTensor, Size) -> Tensor'
        jit = torch.jit.script(conv.jittable(t))
        assert torch.allclose(jit(x1, adj.t()), out)

    adj = adj.sparse_resize((4, 2))
    conv = GMMConv((8, 16),
                   32,
                   dim=3,
                   kernel_size=5,
                   separate_gaussians=separate_gaussians)
    assert conv.__repr__() == 'GMMConv((8, 16), 32, dim=3)'
    out1 = conv((x1, x2), edge_index, value)
    out2 = conv((x1, None), edge_index, value, (4, 2))
    assert out1.size() == (2, 32)
    assert out2.size() == (2, 32)
    assert torch.allclose(conv((x1, x2), edge_index, value, (4, 2)), out1)
    assert torch.allclose(conv((x1, x2), adj.t()), out1)
    assert torch.allclose(conv((x1, None), adj.t()), out2)

    if is_full_test():
        t = '(OptPairTensor, Tensor, OptTensor, Size) -> Tensor'
        jit = torch.jit.script(conv.jittable(t))
        assert torch.allclose(jit((x1, x2), edge_index, value), out1)
        assert torch.allclose(jit((x1, x2), edge_index, value, size=(4, 2)),
                              out1)
        assert torch.allclose(jit((x1, None), edge_index, value, size=(4, 2)),
                              out2)

        t = '(OptPairTensor, SparseTensor, OptTensor, Size) -> Tensor'
        jit = torch.jit.script(conv.jittable(t))
        assert torch.allclose(jit((x1, x2), adj.t()), out1)
        assert torch.allclose(jit((x1, None), adj.t()), out2)
Esempio n. 15
0
def drop_edges(mat, p=0.3):
    mask = torch.rand((mat.storage.row().shape[0], )) > p
    matr = SparseTensor(
        row=mat.storage.row()[mask],
        col=mat.storage.col()[mask],
        value=mat.storage.value()[mask],
        sparse_sizes=mat.storage.sparse_sizes(),
    )
    return matr, mask
Esempio n. 16
0
    def __init__(self, edge_index_dict, embedding_dim, metapath, walk_length,
                 context_size, walks_per_node=1, num_negative_samples=1,
                 num_nodes_dict=None, sparse=False):
        super(MetaPath2Vec, self).__init__()

        if num_nodes_dict is None:
            num_nodes_dict = {}
            for keys, edge_index in edge_index_dict.items():
                key = keys[0]
                N = int(edge_index[0].max() + 1)
                num_nodes_dict[key] = max(N, num_nodes_dict.get(key, N))

                key = keys[-1]
                N = int(edge_index[1].max() + 1)
                num_nodes_dict[key] = max(N, num_nodes_dict.get(key, N))

        adj_dict = {}
        for keys, edge_index in edge_index_dict.items():
            sizes = (num_nodes_dict[keys[0]], num_nodes_dict[keys[-1]])
            row, col = edge_index
            adj = SparseTensor(row=row, col=col, sparse_sizes=sizes)
            adj = adj.to('cpu')
            adj_dict[keys] = adj

        assert metapath[0][0] == metapath[-1][-1]
        assert walk_length >= context_size

        self.adj_dict = adj_dict
        self.embedding_dim = embedding_dim
        self.metapath = metapath
        self.walk_length = walk_length
        self.context_size = context_size
        self.walks_per_node = walks_per_node
        self.num_negative_samples = num_negative_samples
        self.num_nodes_dict = num_nodes_dict

        types = set([x[0] for x in metapath]) | set([x[-1] for x in metapath])
        types = sorted(list(types))

        count = 0
        self.start, self.end = {}, {}
        for key in types:
            self.start[key] = count
            count += num_nodes_dict[key]
            self.end[key] = count

        offset = [self.start[metapath[0][0]]]
        offset += [self.start[keys[-1]] for keys in metapath
                   ] * int((walk_length / len(metapath)) + 1)
        offset = offset[:walk_length + 1]
        assert len(offset) == walk_length + 1
        self.offset = torch.tensor(offset)

        self.embedding = Embedding(count, embedding_dim, sparse=sparse)

        self.reset_parameters()
 def forward(self, data):
     N = data.graph['num_nodes']
     edge_index = data.graph['edge_index']
     if isinstance(edge_index, torch.Tensor):
         row, col = edge_index
         A = SparseTensor(row=row, col=col, sparse_sizes=(N, N)).to_torch_sparse_coo_tensor()
     elif isinstance(edge_index, SparseTensor):
         A = edge_index.to_torch_sparse_coo_tensor()
     logits = self.W(A)
     return logits
Esempio n. 18
0
    def __init__(self, edge_index: Union[Tensor, SparseTensor],
                 sizes: List[int], node_idx: Optional[Tensor] = None,
                 num_nodes: Optional[int] = None, return_e_id: bool = True,
                 transform: Callable = None, **kwargs):

        edge_index = edge_index.to('cpu')

        if 'collate_fn' in kwargs:
            del kwargs['collate_fn']
        if 'dataset' in kwargs:
            del kwargs['dataset']

        # Save for Pytorch Lightning...
        self.edge_index = edge_index
        self.node_idx = node_idx
        self.num_nodes = num_nodes

        self.sizes = sizes
        self.return_e_id = return_e_id
        self.transform = transform
        self.is_sparse_tensor = isinstance(edge_index, SparseTensor)
        self.__val__ = None

        # Obtain a *transposed* `SparseTensor` instance.
        if not self.is_sparse_tensor:
            if (num_nodes is None and node_idx is not None
                    and node_idx.dtype == torch.bool):
                num_nodes = node_idx.size(0)
            if (num_nodes is None and node_idx is not None
                    and node_idx.dtype == torch.long):
                num_nodes = max(int(edge_index.max()), int(node_idx.max())) + 1
            if num_nodes is None:
                num_nodes = int(edge_index.max()) + 1

            value = torch.arange(edge_index.size(1)) if return_e_id else None
            self.adj_t = SparseTensor(row=edge_index[0], col=edge_index[1],
                                      value=value,
                                      sparse_sizes=(num_nodes, num_nodes)).t()
        else:
            adj_t = edge_index
            if return_e_id:
                self.__val__ = adj_t.storage.value()
                value = torch.arange(adj_t.nnz())
                adj_t = adj_t.set_value(value, layout='coo')
            self.adj_t = adj_t

        self.adj_t.storage.rowptr()

        if node_idx is None:
            node_idx = torch.arange(self.adj_t.sparse_size(0))
        elif node_idx.dtype == torch.bool:
            node_idx = node_idx.nonzero(as_tuple=False).view(-1)

        super().__init__(
            node_idx.view(-1).tolist(), collate_fn=self.sample, **kwargs)
Esempio n. 19
0
def test_shadow_k_hop_sampler():
    row = torch.tensor([0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 4, 4, 5, 5])
    col = torch.tensor([1, 2, 3, 0, 2, 0, 1, 4, 5, 0, 2, 5, 2, 4])
    edge_index = torch.stack([row, col], dim=0)
    edge_weight = torch.arange(row.size(0))
    x = torch.randn(6, 16)
    y = torch.randint(3, (6, ), dtype=torch.long)
    data = Data(edge_index=edge_index, edge_weight=edge_weight, x=x, y=y)

    train_mask = torch.tensor([1, 1, 0, 0, 0, 0], dtype=torch.bool)
    loader = ShaDowKHopSampler(data,
                               depth=1,
                               num_neighbors=3,
                               node_idx=train_mask,
                               batch_size=2)
    assert len(loader) == 1

    batch1 = next(iter(loader))
    assert len(batch1) == 7

    assert batch1.batch.tolist() == [0, 0, 0, 0, 1, 1, 1]
    assert batch1.ptr.tolist() == [0, 4, 7]
    assert batch1.root_n_id.tolist() == [0, 5]
    assert batch1.x.tolist() == x[torch.tensor([0, 1, 2, 3, 0, 1, 2])].tolist()
    assert batch1.y.tolist() == y[train_mask].tolist()
    row, col = batch1.edge_index
    assert row.tolist() == [0, 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 5, 6, 6]
    assert col.tolist() == [1, 2, 3, 0, 2, 0, 1, 0, 5, 6, 4, 6, 4, 5]
    e_id = torch.tensor([0, 1, 2, 3, 4, 5, 6, 9, 0, 1, 3, 4, 5, 6])
    assert batch1.edge_weight.tolist() == edge_weight[e_id].tolist()

    adj_t = SparseTensor(row=edge_index[0],
                         col=edge_index[1],
                         value=edge_weight).t()
    data = Data(adj_t=adj_t, x=x, y=y)

    loader = ShaDowKHopSampler(data,
                               depth=1,
                               num_neighbors=3,
                               node_idx=train_mask,
                               batch_size=2)
    assert len(loader) == 1

    batch2 = next(iter(loader))
    assert len(batch2) == 6

    assert batch1.batch.tolist() == batch2.batch.tolist()
    assert batch1.ptr.tolist() == batch2.ptr.tolist()
    assert batch1.root_n_id.tolist() == batch2.root_n_id.tolist()
    assert batch1.x.tolist() == batch2.x.tolist()
    assert batch1.y.tolist() == batch2.y.tolist()
    row, col, value = batch2.adj_t.t().coo()
    assert batch1.edge_index[0].tolist() == row.tolist()
    assert batch1.edge_index[1].tolist() == col.tolist()
    assert batch1.edge_weight.tolist() == value.tolist()
Esempio n. 20
0
def test_ppf_conv():
    x1 = torch.randn(4, 16)
    pos1 = torch.randn(4, 3)
    pos2 = torch.randn(2, 3)
    n1 = F.normalize(torch.rand(4, 3), dim=-1)
    n2 = F.normalize(torch.rand(2, 3), dim=-1)
    edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]])
    row, col = edge_index
    adj = SparseTensor(row=row, col=col, sparse_sizes=(4, 4))

    local_nn = Seq(Lin(16 + 4, 32), ReLU(), Lin(32, 32))
    global_nn = Seq(Lin(32, 32))
    conv = PPFConv(local_nn, global_nn)
    assert conv.__repr__() == (
        'PPFConv(local_nn=Sequential(\n'
        '  (0): Linear(in_features=20, out_features=32, bias=True)\n'
        '  (1): ReLU()\n'
        '  (2): Linear(in_features=32, out_features=32, bias=True)\n'
        '), global_nn=Sequential(\n'
        '  (0): Linear(in_features=32, out_features=32, bias=True)\n'
        '))')
    out = conv(x1, pos1, n1, edge_index)
    assert out.size() == (4, 32)
    assert torch.allclose(conv(x1, pos1, n1, adj.t()), out, atol=1e-6)

    t = '(OptTensor, Tensor, Tensor, Tensor) -> Tensor'
    jit = torch.jit.script(conv.jittable(t))
    assert jit(x1, pos1, n1, edge_index).tolist() == out.tolist()

    t = '(OptTensor, Tensor, Tensor, SparseTensor) -> Tensor'
    jit = torch.jit.script(conv.jittable(t))
    assert torch.allclose(jit(x1, pos1, n1, adj.t()), out, atol=1e-6)

    adj = adj.sparse_resize((4, 2))
    out = conv(x1, (pos1, pos2), (n1, n2), edge_index)
    assert out.size() == (2, 32)
    assert conv((x1, None), (pos1, pos2), (n1, n2),
                edge_index).tolist() == out.tolist()
    assert torch.allclose(conv(x1, (pos1, pos2), (n1, n2), adj.t()),
                          out,
                          atol=1e-6)
    assert torch.allclose(conv((x1, None), (pos1, pos2), (n1, n2), adj.t()),
                          out,
                          atol=1e-6)

    t = '(PairOptTensor, PairTensor, PairTensor, Tensor) -> Tensor'
    jit = torch.jit.script(conv.jittable(t))
    assert jit((x1, None), (pos1, pos2), (n1, n2),
               edge_index).tolist() == out.tolist()

    t = '(PairOptTensor, PairTensor, PairTensor, SparseTensor) -> Tensor'
    jit = torch.jit.script(conv.jittable(t))
    assert torch.allclose(jit((x1, None), (pos1, pos2), (n1, n2), adj.t()),
                          out,
                          atol=1e-6)
Esempio n. 21
0
def test_message_passing_with_aggr_module(aggr_module):
    x = torch.randn(4, 8)
    edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]])
    row, col = edge_index
    adj = SparseTensor(row=row, col=col, sparse_sizes=(4, 4))

    conv = MyAggregatorConv(aggr=aggr_module)
    assert isinstance(conv.aggr_module, aggr.Aggregation)
    out = conv(x, edge_index)
    assert out.size(0) == 4 and out.size(1) in {8, 16}
    assert torch.allclose(conv(x, adj.t()), out)
Esempio n. 22
0
def get_sparse_buffer(module, name):
    row = getattr(module, "{}_row".format(name))
    col = getattr(module, "{}_col".format(name))
    val = getattr(module, "{}_val".format(name))
    siz = getattr(module, "{}_size".format(name))
    return SparseTensor(
        row=row,
        col=col,
        value=val,
        sparse_sizes=siz.tolist(),
    )
Esempio n. 23
0
def process_adj(data):
    N = data.num_nodes
    data.edge_index = to_undirected(data.edge_index, data.num_nodes)

    row, col = data.edge_index

    adj = SparseTensor(row=row, col=col, sparse_sizes=(N, N))
    deg = adj.sum(dim=1).to(torch.float)
    deg_inv_sqrt = deg.pow(-0.5)
    deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
    return adj, deg_inv_sqrt
Esempio n. 24
0
def edge_tensor_type_to_adj_type(
    attr: EdgeAttr,
    tensor_tuple: EdgeTensorType,
) -> Adj:
    r"""Converts an EdgeTensorType tensor tuple to a PyG Adj tensor."""
    src, dst = tensor_tuple

    if attr.layout == EdgeLayout.COO:  # COO: (row, col)
        assert src.dim() == 1 and dst.dim() == 1 and src.numel() == dst.numel()

        if src.numel() == 0:
            return torch.empty((2, 0), dtype=torch.long, device=src.device)

        if (src[0].storage().data_ptr() == dst[1].storage().data_ptr()
                and src.storage_offset() < dst.storage_offset()):
            # Do not copy if the tensor tuple is constructed from the same
            # storage (instead, return a view):
            out = torch.empty(0, dtype=src.dtype)
            out.set_(src.storage(),
                     storage_offset=src.storage_offset(),
                     size=(src.size()[0] + dst.size()[0], ))
            return out.view(2, -1)

        return torch.stack([src, dst], dim=0)

    elif attr.layout == EdgeLayout.CSR:  # CSR: (rowptr, col)
        return SparseTensor(rowptr=src,
                            col=dst,
                            is_sorted=True,
                            sparse_sizes=attr.size)

    elif attr.layout == EdgeLayout.CSC:  # CSC: (row, colptr)
        # CSC is a transposed adjacency matrix, so rowptr is the compressed
        # column and col is the uncompressed row.
        sparse_sizes = None if attr.size is None else (attr.size[1],
                                                       attr.size[0])
        return SparseTensor(rowptr=dst,
                            col=src,
                            is_sorted=True,
                            sparse_sizes=sparse_sizes)
    raise ValueError(f"Bad edge layout (got '{attr.layout}')")
Esempio n. 25
0
def test_padded_index_select_runtime():
    return
    from torch_geometric.datasets import Planetoid

    device = torch.device('cuda')
    start = torch.cuda.Event(enable_timing=True)
    end = torch.cuda.Event(enable_timing=True)

    dataset = Planetoid('/tmp/Planetoid', name='PubMed')
    data = dataset[0]
    row, col = data.edge_index.to(device)

    adj = SparseTensor(row=row, col=col)
    rowcount = adj.storage.rowcount().to(device)
    rowptr = adj.storage.rowptr().to(device)
    binptr = torch.tensor([0, 4, 11, 30, 50, 80, 120, 140, 2000]).to(device)

    x = torch.randn(adj.size(0), 512).to(device)

    data = torch.ops.torch_sparse.padded_index(rowptr, col, rowcount, binptr)
    node_perm, row_perm, col_perm, mask, node_sizes, edge_sizes = data

    out = torch.ops.torch_sparse.padded_index_select(x, col_perm,
                                                     torch.tensor(0.))
    outs = out.split(edge_sizes)
    for out, size in zip(outs, node_sizes):
        print(out.view(size, -1, x.size(-1)).shape)

    for i in range(110):
        if i == 10:
            start.record()
        torch.ops.torch_sparse.padded_index(rowptr, col, rowcount, binptr)
    end.record()
    torch.cuda.synchronize()
    print('padded index', start.elapsed_time(end))

    for i in range(110):
        if i == 10:
            start.record()
        out = torch.ops.torch_sparse.padded_index_select(
            x, col_perm, torch.tensor(0.))
        out.split(edge_sizes)
    end.record()
    torch.cuda.synchronize()
    print('padded index select', start.elapsed_time(end))

    for i in range(110):
        if i == 10:
            start.record()
        x.index_select(0, col)
    end.record()
    torch.cuda.synchronize()
    print('index_select', start.elapsed_time(end))
Esempio n. 26
0
def main(args):
    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    path = osp.join('..', 'data', 'Reddit')
    dataset = Reddit(path)
    data = dataset[0]

    features = data.x.to(device)
    labels = data.y.to(device)
    edge_index = data.edge_index.to(device)
    adj = SparseTensor(row=edge_index[0], col=edge_index[1])
    train_mask = torch.BoolTensor(data.train_mask).to(device)
    val_mask = torch.BoolTensor(data.val_mask).to(device)
    test_mask = torch.BoolTensor(data.test_mask).to(device)

    model = GraphSAGE(dataset.num_features, args.n_hidden, dataset.num_classes,
                      args.aggr, F.relu, args.dropout).to(device)

    loss_fcn = nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    dur = []
    for epoch in range(1, args.epochs + 1):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features, adj)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        if args.eval:
            acc = evaluate(model, adj, features, labels, val_mask)
        else:
            acc = 0
        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} ".
              format(epoch, np.mean(dur), loss.item(), acc))

    if args.eval:
        print()
        acc = evaluate(model, adj, features, labels, test_mask)
        print("Test Accuracy {:.4f}".format(acc))
Esempio n. 27
0
def test_gine_conv():
    x1 = torch.randn(4, 16)
    x2 = torch.randn(2, 16)
    edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]])
    row, col = edge_index
    value = torch.randn(row.size(0), 16)
    adj = SparseTensor(row=row, col=col, value=value, sparse_sizes=(4, 4))

    nn = Seq(Lin(16, 32), ReLU(), Lin(32, 32))
    conv = GINEConv(nn, train_eps=True)
    assert conv.__repr__() == (
        'GINEConv(nn=Sequential(\n'
        '  (0): Linear(in_features=16, out_features=32, bias=True)\n'
        '  (1): ReLU()\n'
        '  (2): Linear(in_features=32, out_features=32, bias=True)\n'
        '))')
    out = conv(x1, edge_index, value)
    assert out.size() == (4, 32)
    assert conv(x1, edge_index, value, size=(4, 4)).tolist() == out.tolist()
    assert conv(x1, adj.t()).tolist() == out.tolist()

    if is_full_test():
        t = '(Tensor, Tensor, OptTensor, Size) -> Tensor'
        jit = torch.jit.script(conv.jittable(t))
        assert jit(x1, edge_index, value).tolist() == out.tolist()
        assert jit(x1, edge_index, value, size=(4, 4)).tolist() == out.tolist()

        t = '(Tensor, SparseTensor, OptTensor, Size) -> Tensor'
        jit = torch.jit.script(conv.jittable(t))
        assert jit(x1, adj.t()).tolist() == out.tolist()

    adj = adj.sparse_resize((4, 2))
    out1 = conv((x1, x2), edge_index, value)
    out2 = conv((x1, None), edge_index, value, (4, 2))
    assert out1.size() == (2, 32)
    assert out2.size() == (2, 32)
    assert conv((x1, x2), edge_index, value, (4, 2)).tolist() == out1.tolist()
    assert conv((x1, x2), adj.t()).tolist() == out1.tolist()
    assert conv((x1, None), adj.t()).tolist() == out2.tolist()

    if is_full_test():
        t = '(OptPairTensor, Tensor, OptTensor, Size) -> Tensor'
        jit = torch.jit.script(conv.jittable(t))
        assert jit((x1, x2), edge_index, value).tolist() == out1.tolist()
        assert jit((x1, x2), edge_index, value,
                   size=(4, 2)).tolist() == out1.tolist()
        assert jit((x1, None), edge_index, value,
                   size=(4, 2)).tolist() == out2.tolist()

        t = '(OptPairTensor, SparseTensor, OptTensor, Size) -> Tensor'
        jit = torch.jit.script(conv.jittable(t))
        assert jit((x1, x2), adj.t()).tolist() == out1.tolist()
        assert jit((x1, None), adj.t()).tolist() == out2.tolist()
Esempio n. 28
0
    def __init__(self,
                 edge_index_dict,
                 embedding_dim,
                 metapath,
                 walk_length,
                 context_size,
                 num_nodes_dict,
                 types,
                 type_accs,
                 walks_per_node=1,
                 num_negative_samples=1,
                 sparse=False):
        super(MetaPath2Vec, self).__init__()

        adj_dict = {}
        for keys, edge_index in edge_index_dict.items():
            sizes = (num_nodes_dict[keys[0]], num_nodes_dict[keys[-1]])
            row, col = edge_index
            row, col = row - type_accs[keys[0]], col - type_accs[keys[-1]]
            adj = SparseTensor(row=row, col=col, sparse_sizes=sizes)
            adj = adj.to('cpu')
            adj_dict[keys] = adj

        assert metapath[0][0] == metapath[-1][-1]
        assert walk_length >= context_size

        self.adj_dict = adj_dict
        self.embedding_dim = embedding_dim
        self.metapath = metapath
        self.walk_length = walk_length
        self.context_size = context_size
        self.walks_per_node = walks_per_node
        self.num_negative_samples = num_negative_samples
        self.num_nodes_dict = num_nodes_dict

        count = 0
        self.start, self.end = {}, {}
        for key in types:
            self.start[key] = count
            count += num_nodes_dict[key]
            self.end[key] = count

        offset = [self.start[metapath[0][0]]]
        offset += [self.start[keys[-1]] for keys in metapath
                   ] * int((walk_length / len(metapath)) + 1)
        offset = offset[:walk_length + 1]
        assert len(offset) == walk_length + 1
        self.offset = torch.tensor(offset)

        self.embedding = Embedding(count, embedding_dim, sparse=sparse)

        self.reset_parameters()
Esempio n. 29
0
def test_point_transformer_conv():
    x1 = torch.rand(4, 16)
    x2 = torch.randn(2, 8)
    pos1 = torch.rand(4, 3)
    pos2 = torch.randn(2, 3)
    edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]])
    row, col = edge_index
    adj = SparseTensor(row=row, col=col, sparse_sizes=(4, 4))

    conv = PointTransformerConv(in_channels=16, out_channels=32)
    assert str(conv) == 'PointTransformerConv(16, 32)'

    out = conv(x1, pos1, edge_index)
    assert out.size() == (4, 32)
    assert torch.allclose(conv(x1, pos1, adj.t()), out, atol=1e-6)

    if is_full_test():
        t = '(Tensor, Tensor, Tensor) -> Tensor'
        jit = torch.jit.script(conv.jittable(t))
        assert jit(x1, pos1, edge_index).tolist() == out.tolist()

        t = '(Tensor, Tensor, SparseTensor) -> Tensor'
        jit = torch.jit.script(conv.jittable(t))
        assert torch.allclose(jit(x1, pos1, adj.t()), out, atol=1e-6)

    pos_nn = Sequential(Linear(3, 16), ReLU(), Linear(16, 32))
    attn_nn = Sequential(Linear(32, 32), ReLU(), Linear(32, 32))
    conv = PointTransformerConv(16, 32, pos_nn, attn_nn)

    out = conv(x1, pos1, edge_index)
    assert out.size() == (4, 32)
    assert torch.allclose(conv(x1, pos1, adj.t()), out, atol=1e-6)

    conv = PointTransformerConv((16, 8), 32)
    adj = adj.sparse_resize((4, 2))

    out = conv((x1, x2), (pos1, pos2), edge_index)
    assert out.size() == (2, 32)
    assert torch.allclose(conv((x1, x2), (pos1, pos2), adj.t()),
                          out,
                          atol=1e-6)

    if is_full_test():
        t = '(PairTensor, PairTensor, Tensor) -> Tensor'
        jit = torch.jit.script(conv.jittable(t))
        assert jit((x1, x2), (pos1, pos2), edge_index).tolist() == out.tolist()

        t = '(PairTensor, PairTensor, SparseTensor) -> Tensor'
        jit = torch.jit.script(conv.jittable(t))
        assert torch.allclose(jit((x1, x2), (pos1, pos2), adj.t()),
                              out,
                              atol=1e-6)
Esempio n. 30
0
def test_han_conv():

    x_dict = {
        'author': torch.randn(6, 16),
        'paper': torch.randn(5, 12),
        'term': torch.randn(4, 3)
    }
    edge1 = torch.randint(0, 6, (2, 7), dtype=torch.long)
    edge2 = torch.randint(0, 5, (2, 4), dtype=torch.long)
    edge3 = torch.randint(0, 3, (2, 5), dtype=torch.long)
    edge_index_dict = {
        ('author', 'metapath0', 'author'): edge1,
        ('paper', 'matapath1', 'paper'): edge2,
        ('paper', 'matapath2', 'paper'): edge3,
    }

    adj_t_dict = {}
    for edge_type, edge_index in edge_index_dict.items():
        src_type, _, dst_type = edge_type
        adj_t_dict[edge_type] = SparseTensor(
            row=edge_index[0],
            col=edge_index[1],
            sparse_sizes=(x_dict[src_type].size(0),
                          x_dict[dst_type].size(0))).t()

    metadata = (list(x_dict.keys()), list(edge_index_dict.keys()))
    in_channels = {'author': 16, 'paper': 12, 'term': 3}

    conv = HANConv(in_channels, 16, metadata, heads=2)
    assert str(conv) == 'HANConv(16, heads=2)'
    out_dict1 = conv(x_dict, edge_index_dict)
    assert len(out_dict1) == 3
    assert out_dict1['author'].size() == (6, 16)
    assert out_dict1['paper'].size() == (5, 16)
    assert out_dict1['term'] is None
    del out_dict1['term']
    del x_dict['term']

    out_dict2 = conv(x_dict, adj_t_dict)
    assert len(out_dict1) == len(out_dict2)
    for node_type in out_dict1.keys():
        assert torch.allclose(out_dict1[node_type],
                              out_dict2[node_type],
                              atol=1e-6)

    # non zero dropout
    conv = HANConv(in_channels, 16, metadata, heads=2, dropout=0.1)
    assert str(conv) == 'HANConv(16, heads=2)'
    out_dict1 = conv(x_dict, edge_index_dict)
    assert len(out_dict1) == 2
    assert out_dict1['author'].size() == (6, 16)
    assert out_dict1['paper'].size() == (5, 16)