def __init__(self,
              descriptor_dim,
              sampler=None,
              split='train',
              transform=DefaultTransform,
              build_graph=False,
              cls=False):
     super(FaustFEPts5k).__init__()
     self.name = 'FaustFEPts5k'
     self.IDlist = np.arange(10000)
     self.split = split
     if self.split == 'train':
         raise RuntimeError("This dataset is Test Only")
     elif self.split == 'test':
         self.IDlist = self.IDlist
     elif self.split == 'val':
         self.IDlist = self.IDlist[:40]
     self.file_path = '{}/faust/scans/{{0:03d}}_{{0:03d}}.mat'.format(
         PATH_TO_DATA)
     self.template_feats = helper.loadSMPLDescriptors()[:, :descriptor_dim]
     self.template_points = helper.loadSMPLModels()[0].verts
     self.pre_transform = None  #T.NormalizeScale()
     self.cls = cls
     if build_graph:
         self.transform = T.Compose(
             [transform, T.KNNGraph(k=6),
              T.ToDense(5000)])
     else:
         self.transform = T.Compose([transform, T.ToDense(5000)])
예제 #2
0
 def __init__(self,
              descriptor_dim,
              sampler=None,
              split='train',
              transform=DefaultTransform,
              cls=False,
              build_graph=False):
     super(SurrealFEPts5k).__init__()
     self.name = 'SurrealFEPts5k'
     self.split = split
     if self.split == 'train':
         self.IDlist = IDlist[:, :-(num_test * num_views)].reshape(-1)
     elif self.split == 'test':
         self.IDlist = IDlist[:, -(num_test * num_views):].reshape(-1)
     elif self.split == 'val':
         self.IDlist = IDlist[:, :num_views].reshape(-1)
     self.file_path = '{}/scans/{{0:06d}}/{{1:03d}}.mat'.format(
         PATH_TO_SURREAL)
     self.template_feats = helper.loadSMPLDescriptors()[:, :descriptor_dim]
     self.template_points = helper.loadSMPLModels()[0].verts
     self.cls = cls
     if build_graph:
         self.transform = T.Compose(
             [transform, T.KNNGraph(k=6),
              T.ToDense(5000)])
     else:
         self.transform = T.Compose([transform, T.ToDense(5000)])
예제 #3
0
def get_dataset(name, sparse=True, dataset_div=None):
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name)
    try:
        shutil.copytree('../input/smt', path)
    except shutil.Error as e:
        for src,dst,msg in e.args[0]:
            print(dst,src,msg)
    except FileExistsError as e:
        print(e)
    
    dataset = TUDataset(path, name, use_node_attr=True)
    dataset.data.edge_attr = None

    if dataset.data.x is None:
        print('confirm the data.x do not exists!!')
        exit(1)

        max_degree = 0
        degs = []
        for data in dataset:
            degs += [degree(data.edge_index[0], dtype=torch.long)]
            max_degree = max(max_degree, degs[-1].max().item())

        if max_degree < 1000:
            dataset.transform = T.OneHotDegree(max_degree)
        else:
            deg = torch.cat(degs, dim=0).to(torch.float)
            mean, std = deg.mean().item(), deg.std().item()
            dataset.transform = NormalizedDegree(mean, std)

    if not sparse:
        num_nodes = max_num_nodes = 0
        for data in dataset:
            num_nodes += data.num_nodes
            max_num_nodes = max(data.num_nodes, max_num_nodes)

        # Filter out a few really large graphs in order to apply DiffPool.
        if name == 'REDDIT-BINARY':
            num_nodes = min(int(num_nodes / len(dataset) * 1.5), max_num_nodes)
        else:
            num_nodes = min(int(num_nodes / len(dataset) * 5), max_num_nodes)

        indices = []
        for i, data in enumerate(dataset):
            if data.num_nodes <= num_nodes:
                indices.append(i)
        dataset = dataset[torch.tensor(indices)]

        if dataset.transform is None:
            dataset.transform = T.ToDense(num_nodes)
        else:
            dataset.transform = T.Compose(
                [dataset.transform, T.ToDense(num_nodes)])

    if dataset_div!=None:
        dataset=dataset.shuffle()[:len(dataset)//dataset_div]

    return dataset
예제 #4
0
def get_dataset(name, sparse=True, cleaned=False):

    if name == 'node':
        path = osp.join(os.environ['GNN_TRAINING_DATA_ROOT'], name)
        print(path)
        dataset = HitGraphDataset2(path, directed=False, categorical=True)
    else:
        path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data',
                        name)
        dataset = TUDataset(path, name, cleaned=cleaned)
        dataset.data.edge_attr = None

        if dataset.data.x is None:
            max_degree = 0
            degs = []
            for data in dataset:
                degs += [degree(data.edge_index[0], dtype=torch.long)]
                max_degree = max(max_degree, degs[-1].max().item())

            if max_degree < 1000:
                dataset.transform = T.OneHotDegree(max_degree)
            else:
                deg = torch.cat(degs, dim=0).to(torch.float)
                mean, std = deg.mean().item(), deg.std().item()
                dataset.transform = NormalizedDegree(mean, std)

        if not sparse:
            num_nodes = max_num_nodes = 0
            for data in dataset:
                num_nodes += data.num_nodes
                max_num_nodes = max(data.num_nodes, max_num_nodes)

            # Filter out a few really large graphs in order to apply DiffPool.
            if name == 'REDDIT-BINARY':
                num_nodes = min(int(num_nodes / len(dataset) * 1.5),
                                max_num_nodes)
            else:
                num_nodes = min(int(num_nodes / len(dataset) * 5),
                                max_num_nodes)

            indices = []
            for i, data in enumerate(dataset):
                if data.num_nodes <= num_nodes:
                    indices.append(i)
            dataset = dataset[torch.tensor(indices)]

            if dataset.transform is None:
                dataset.transform = T.ToDense(num_nodes)
            else:
                dataset.transform = T.Compose(
                    [dataset.transform,
                     T.ToDense(num_nodes)])

    return dataset
예제 #5
0
    def get_dataset(self, name, sparse=True, dataset_div=None):
        path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data',
                        name)
        try:
            if 'SMT' in name:
                shutil.copytree(osp.join('../input', name.lower()), path)
        except FileExistsError as e:
            print(e)

        dataset = TUDataset(path, name, use_node_attr=True)
        dataset.data.edge_attr = None

        if not sparse:
            num_nodes = max_num_nodes = 0
            limit_num = 1000
            for data in dataset:
                num_nodes += data.num_nodes
                max_num_nodes = max(data.num_nodes, max_num_nodes)

            # Filter out a few really large graphs in order to apply DiffPool
            num_nodes = min(int(num_nodes / len(dataset)), max_num_nodes)
            num_nodes = max(int(num_nodes / len(dataset)), limit_num)
            self.num_nodes = num_nodes
            indices = []
            for i, data in enumerate(dataset):
                if data.num_nodes <= num_nodes:
                    indices.append(i)
            dataset = dataset[torch.tensor(indices)]

            dataset.transform = T.ToDense(num_nodes)

        if dataset_div is not None:
            dataset = dataset.shuffle()[:len(dataset) // dataset_div]

        return dataset.shuffle()
예제 #6
0
def get_adj_list(data, max_adj=None):
    data.edge_index = None
    data = T.FaceToEdge(remove_faces=False)(data)
    edge, _ = add_self_loops(data.edge_index)

    data = T.ToDense()(data)
    adj_mat = data.adj

    num_list = adj_mat.sum(1).long().unsqueeze(1)

    if max_adj is None:
        max_adj = num_list.max().item()
    else:
        max_list = torch.full_like(num_list, max_adj).long()
        num_list = torch.where(num_list > max_adj, max_list, num_list)

    adj_list = torch.full_like(adj_mat, -1).long()
    N = data.pos.shape[0]

    for n in range(N):
        adj = adj_mat[n].nonzero()
        num = num_list[n]
        adj_list[n, :num] = adj.t()[:, :num]

    adj_list = torch.cat([adj_list[:, :max_adj], num_list],
                         dim=1)  # N * max_adj
    return adj_list.type_as(edge), edge
예제 #7
0
def graph_kernel_dataset(name, path, sparse=True):
    dataset = TUDataset(path, name)
    dataset.data.edge_attr = None

    if dataset.data.x is None:
        max_degree = 0
        degs = []
        for data in dataset:
            degs += [degree(data.edge_index[0], dtype=torch.long)]
            max_degree = max(max_degree, degs[-1].max().item())

        if max_degree < 1000:
            dataset.transform = T.OneHotDegree(max_degree)
        else:
            deg = torch.cat(degs, dim=0).to(torch.float)
            mean, std = deg.mean().item(), deg.std().item()
            dataset.transform = NormalizedDegree(mean, std)

    if not sparse:
        num_nodes = max_num_nodes = 0
        for data in dataset:
            num_nodes += data.num_nodes
            max_num_nodes = max(data.num_nodes, max_num_nodes)

        # Filter out a few really large graphs in order to apply DiffPool.
        if name == 'REDDIT-BINARY':
            num_nodes = min(int(num_nodes / len(dataset) * 1.5), max_num_nodes)
        else:
            num_nodes = min(int(num_nodes / len(dataset) * 5), max_num_nodes)

        indices = []
        for i, data in enumerate(dataset):
            if data.num_nodes <= num_nodes:
                indices.append(i)
        dataset = dataset[torch.Tensor(indices)]

        if dataset.transform is None:
            dataset.transform = T.ToDense(num_nodes)
        else:
            dataset.transform = T.Compose(
                [dataset.transform, T.ToDense(num_nodes)])

    return dataset
예제 #8
0
def get_dataset(name, sparse=True):
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name)
    dataset = TUDataset(path, name)
    dataset.data.edge_attr = None

    if dataset.data.x is None:
        max_degree = 0
        degs = []
        for data in dataset:
            degs += [degree(data.edge_index[0], dtype=torch.long)]
            max_degree = max(max_degree, degs[-1].max().item())

        if max_degree < 1000:
            dataset.transform = T.OneHotDegree(max_degree)
        else:
            deg = torch.cat(degs, dim=0).to(torch.float)
            mean, std = deg.mean().item(), deg.std().item()
            dataset.transform = NormalizedDegree(mean, std)

    if not sparse:
        num_nodes = max_num_nodes = 0
        for data in dataset:
            num_nodes += data.num_nodes
            max_num_nodes = max(data.num_nodes, max_num_nodes)
        if name == 'REDDIT-BINARY':
            num_nodes = min(int(num_nodes / len(dataset) * 1.5), max_num_nodes)
        else:
            num_nodes = min(int(num_nodes / len(dataset) * 5), max_num_nodes)

        indices = []
        for i, data in enumerate(dataset):
            if data.num_nodes <= num_nodes:
                indices.append(i)
        dataset = dataset[torch.tensor(indices)]

        if dataset.transform is None:
            dataset.transform = T.ToDense(num_nodes)
        else:
            dataset.transform = T.Compose(
                [dataset.transform, T.ToDense(num_nodes)])

    return dataset
예제 #9
0
def test(loader, max_nodes):
    model.eval()

    correct = 0
    loss_all = 0
    loader.dataset.transform = T.Compose(
        [delete_edge_attr, T.ToDense(max_nodes[0])])
    i = 0
    for data in loader:
        data.y = data.y.squeeze(-1)
        data = data.to(device)
        output = model(data)[0]
        pred = output.max(dim=1)[1]
        correct += pred.eq(data.y).sum().item()
        loss = F.nll_loss(output, data.y)
        loss_all += data.x.size(0) * loss.item()
        i += 1
        loader.dataset.transform = T.Compose(
            [delete_edge_attr, T.ToDense(max_nodes[i])])

    return correct / len(loader.dataset), loss_all / len(loader.dataset)
예제 #10
0
def get_dataset(name, sparse=True, cleaned=False, normalize=False):
    dataset = TUDataset(os.path.join('./data', name),
                        name,
                        use_node_attr=True,
                        cleaned=cleaned)
    dataset.data.edge_attr = None

    if dataset.data.x is None:
        max_degree = 0
        degs = []
        for data in dataset:
            degs += [degree(data.edge_index[0], dtype=torch.long)]
            max_degree = max(max_degree, degs[-1].max().item())

        if max_degree < 1000:
            dataset.transform = T.OneHotDegree(max_degree)
        else:
            deg = torch.cat(degs, dim=0).to(torch.float)
            mean, std = deg.mean().item(), deg.std().item()
            dataset.transform = NormalizedDegree(mean, std)

    elif normalize:

        dataset.data.x -= torch.mean(dataset.data.x, axis=0)
        dataset.data.x /= torch.std(dataset.data.x, axis=0)

    if not sparse:
        max_num_nodes = 0
        for data in dataset:
            max_num_nodes = max(data.num_nodes, max_num_nodes)

        if dataset.transform is None:
            dataset.transform = T.ToDense(max_num_nodes)
        else:
            dataset.transform = T.Compose(
                [dataset.transform,
                 T.ToDense(max_num_nodes)])

    return dataset
예제 #11
0
def train(loader, max_nodes):
    model.train()

    loss_all = 0
    loader.dataset.transform = T.Compose(
        [delete_edge_attr, T.ToDense(max_nodes[0])])
    i = 0
    for data in loader:
        data = data.to(device)
        data.y = data.y.squeeze(-1)
        optimizer.zero_grad()
        output, reg = model(data)

        loss = F.nll_loss(output, data.y) + reg
        loss.backward()
        loss_all += data.x.size(0) * loss.item()
        optimizer.step()
        i += 1
        loader.dataset.transform = T.Compose(
            [delete_edge_attr, T.ToDense(max_nodes[i])])

    return loss_all / len(loader.dataset)
예제 #12
0
def test_baseDataset_Loader(filted_dataset=None):
    root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    shutil.copytree('../input/smt', root)
    dataset = TUDataset(root, 'SMT')

    assert len(dataset) == 2688
    assert dataset.num_features == 20
    assert dataset.num_classes == 2
    assert dataset.__repr__() == 'SMT(2688)'
    assert dataset[0].keys == ['x', 'edge_index', 'y']  # ==len(data.keys)
    assert len(dataset.shuffle()) == 2688

    loader = DataLoader(dataset, batch_size=len(dataset))
    assert loader.dataset.__repr__() == 'SMT(2688)'
    for batch in loader:
        assert batch.num_graphs == 2688
        assert batch.num_nodes == sum([data.num_nodes
                                       for data in dataset])  # 2788794
        assert batch.num_edges == sum([data.num_edges
                                       for data in dataset])  # 13347768
        assert batch.keys == ['x', 'edge_index', 'y', 'batch']

    num_nodes = sum(dataset.data.num_nodes)
    max_num_nodes = max(dataset.data.num_nodes)
    num_nodes = min(int(num_nodes / len(dataset) * 5), max_num_nodes)

    assert num_nodes == 5187
    assert max_num_nodes == 34623

    indices = []
    for i, data in enumerate(dataset):
        if data.num_nodes < num_nodes:
            indices.append(i)

    if not filted_dataset:
        filted_dataset = dataset[torch.tensor(indices)]
        filted_dataset.transform = T.ToDense(num_nodes)  # add 'adj' attribute

    assert ('adj' in dataset[0]) is False
    assert ('adj' in filted_dataset[0]) is True
예제 #13
0
class MyTransform(object):
    def __call__(self, data):
        # Only use node attributes.
        data.x = data.x[:, :-3]

        # Add self loops.
        arange = torch.arange(data.adj.size(-1), dtype=torch.long)
        data.adj[arange, arange] = 1

        return data


path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'ENZYMES_d')
dataset = TUDataset(path,
                    name='ENZYMES',
                    transform=T.Compose([T.ToDense(max_nodes),
                                         MyTransform()]),
                    pre_filter=MyFilter())
dataset = dataset.shuffle()
n = (len(dataset) + 9) // 10
test_dataset = dataset[:n]
val_dataset = dataset[n:2 * n]
train_dataset = dataset[2 * n:]
test_loader = DenseDataLoader(test_dataset, batch_size=20)
val_loader = DenseDataLoader(val_dataset, batch_size=20)
train_loader = DenseDataLoader(train_dataset, batch_size=20)


class GNN(torch.nn.Module):
    def __init__(self,
                 in_channels,
예제 #14
0
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
from torch_geometric.nn import DenseSAGEConv, dense_diff_pool

max_nodes = 150


class MyFilter(object):
    def __call__(self, data):
        return data.num_nodes <= max_nodes


path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data',
                'PROTEINS_dense')
dataset = TUDataset(path, name='PROTEINS', transform=T.ToDense(max_nodes),
                    pre_filter=MyFilter())
dataset = dataset.shuffle()
n = (len(dataset) + 9) // 10
test_dataset = dataset[:n]
val_dataset = dataset[n:2 * n]
train_dataset = dataset[2 * n:]
test_loader = DenseDataLoader(test_dataset, batch_size=20)
val_loader = DenseDataLoader(val_dataset, batch_size=20)
train_loader = DenseDataLoader(train_dataset, batch_size=20)


class GNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels,
                 normalize=False, add_loop=False, lin=True):
        super(GNN, self).__init__()
from torch_geometric.data import DenseDataLoader
from torch_geometric.nn import DenseSAGEConv, dense_diff_pool

max_nodes = 150


class MyFilter(object):
    def __call__(self, data):
        return data.num_nodes <= max_nodes


path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data',
                'PROTEINS_dense')
dataset = TUDataset(path,
                    name='PROTEINS',
                    transform=T.ToDense(max_nodes),
                    pre_filter=MyFilter())
dataset = dataset.shuffle()
n = (len(dataset) + 9) // 10
test_dataset = dataset[:n]
val_dataset = dataset[n:2 * n]
train_dataset = dataset[2 * n:]
test_loader = DenseDataLoader(test_dataset, batch_size=20)
val_loader = DenseDataLoader(val_dataset, batch_size=20)
train_loader = DenseDataLoader(train_dataset, batch_size=20)


class GNN(torch.nn.Module):
    def __init__(self,
                 in_channels,
                 hidden_channels,