def __init__(self, descriptor_dim, sampler=None, split='train', transform=DefaultTransform, build_graph=False, cls=False): super(FaustFEPts5k).__init__() self.name = 'FaustFEPts5k' self.IDlist = np.arange(10000) self.split = split if self.split == 'train': raise RuntimeError("This dataset is Test Only") elif self.split == 'test': self.IDlist = self.IDlist elif self.split == 'val': self.IDlist = self.IDlist[:40] self.file_path = '{}/faust/scans/{{0:03d}}_{{0:03d}}.mat'.format( PATH_TO_DATA) self.template_feats = helper.loadSMPLDescriptors()[:, :descriptor_dim] self.template_points = helper.loadSMPLModels()[0].verts self.pre_transform = None #T.NormalizeScale() self.cls = cls if build_graph: self.transform = T.Compose( [transform, T.KNNGraph(k=6), T.ToDense(5000)]) else: self.transform = T.Compose([transform, T.ToDense(5000)])
def __init__(self, descriptor_dim, sampler=None, split='train', transform=DefaultTransform, cls=False, build_graph=False): super(SurrealFEPts5k).__init__() self.name = 'SurrealFEPts5k' self.split = split if self.split == 'train': self.IDlist = IDlist[:, :-(num_test * num_views)].reshape(-1) elif self.split == 'test': self.IDlist = IDlist[:, -(num_test * num_views):].reshape(-1) elif self.split == 'val': self.IDlist = IDlist[:, :num_views].reshape(-1) self.file_path = '{}/scans/{{0:06d}}/{{1:03d}}.mat'.format( PATH_TO_SURREAL) self.template_feats = helper.loadSMPLDescriptors()[:, :descriptor_dim] self.template_points = helper.loadSMPLModels()[0].verts self.cls = cls if build_graph: self.transform = T.Compose( [transform, T.KNNGraph(k=6), T.ToDense(5000)]) else: self.transform = T.Compose([transform, T.ToDense(5000)])
def get_dataset(name, sparse=True, dataset_div=None): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name) try: shutil.copytree('../input/smt', path) except shutil.Error as e: for src,dst,msg in e.args[0]: print(dst,src,msg) except FileExistsError as e: print(e) dataset = TUDataset(path, name, use_node_attr=True) dataset.data.edge_attr = None if dataset.data.x is None: print('confirm the data.x do not exists!!') exit(1) max_degree = 0 degs = [] for data in dataset: degs += [degree(data.edge_index[0], dtype=torch.long)] max_degree = max(max_degree, degs[-1].max().item()) if max_degree < 1000: dataset.transform = T.OneHotDegree(max_degree) else: deg = torch.cat(degs, dim=0).to(torch.float) mean, std = deg.mean().item(), deg.std().item() dataset.transform = NormalizedDegree(mean, std) if not sparse: num_nodes = max_num_nodes = 0 for data in dataset: num_nodes += data.num_nodes max_num_nodes = max(data.num_nodes, max_num_nodes) # Filter out a few really large graphs in order to apply DiffPool. if name == 'REDDIT-BINARY': num_nodes = min(int(num_nodes / len(dataset) * 1.5), max_num_nodes) else: num_nodes = min(int(num_nodes / len(dataset) * 5), max_num_nodes) indices = [] for i, data in enumerate(dataset): if data.num_nodes <= num_nodes: indices.append(i) dataset = dataset[torch.tensor(indices)] if dataset.transform is None: dataset.transform = T.ToDense(num_nodes) else: dataset.transform = T.Compose( [dataset.transform, T.ToDense(num_nodes)]) if dataset_div!=None: dataset=dataset.shuffle()[:len(dataset)//dataset_div] return dataset
def get_dataset(name, sparse=True, cleaned=False): if name == 'node': path = osp.join(os.environ['GNN_TRAINING_DATA_ROOT'], name) print(path) dataset = HitGraphDataset2(path, directed=False, categorical=True) else: path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name) dataset = TUDataset(path, name, cleaned=cleaned) dataset.data.edge_attr = None if dataset.data.x is None: max_degree = 0 degs = [] for data in dataset: degs += [degree(data.edge_index[0], dtype=torch.long)] max_degree = max(max_degree, degs[-1].max().item()) if max_degree < 1000: dataset.transform = T.OneHotDegree(max_degree) else: deg = torch.cat(degs, dim=0).to(torch.float) mean, std = deg.mean().item(), deg.std().item() dataset.transform = NormalizedDegree(mean, std) if not sparse: num_nodes = max_num_nodes = 0 for data in dataset: num_nodes += data.num_nodes max_num_nodes = max(data.num_nodes, max_num_nodes) # Filter out a few really large graphs in order to apply DiffPool. if name == 'REDDIT-BINARY': num_nodes = min(int(num_nodes / len(dataset) * 1.5), max_num_nodes) else: num_nodes = min(int(num_nodes / len(dataset) * 5), max_num_nodes) indices = [] for i, data in enumerate(dataset): if data.num_nodes <= num_nodes: indices.append(i) dataset = dataset[torch.tensor(indices)] if dataset.transform is None: dataset.transform = T.ToDense(num_nodes) else: dataset.transform = T.Compose( [dataset.transform, T.ToDense(num_nodes)]) return dataset
def get_dataset(self, name, sparse=True, dataset_div=None): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name) try: if 'SMT' in name: shutil.copytree(osp.join('../input', name.lower()), path) except FileExistsError as e: print(e) dataset = TUDataset(path, name, use_node_attr=True) dataset.data.edge_attr = None if not sparse: num_nodes = max_num_nodes = 0 limit_num = 1000 for data in dataset: num_nodes += data.num_nodes max_num_nodes = max(data.num_nodes, max_num_nodes) # Filter out a few really large graphs in order to apply DiffPool num_nodes = min(int(num_nodes / len(dataset)), max_num_nodes) num_nodes = max(int(num_nodes / len(dataset)), limit_num) self.num_nodes = num_nodes indices = [] for i, data in enumerate(dataset): if data.num_nodes <= num_nodes: indices.append(i) dataset = dataset[torch.tensor(indices)] dataset.transform = T.ToDense(num_nodes) if dataset_div is not None: dataset = dataset.shuffle()[:len(dataset) // dataset_div] return dataset.shuffle()
def get_adj_list(data, max_adj=None): data.edge_index = None data = T.FaceToEdge(remove_faces=False)(data) edge, _ = add_self_loops(data.edge_index) data = T.ToDense()(data) adj_mat = data.adj num_list = adj_mat.sum(1).long().unsqueeze(1) if max_adj is None: max_adj = num_list.max().item() else: max_list = torch.full_like(num_list, max_adj).long() num_list = torch.where(num_list > max_adj, max_list, num_list) adj_list = torch.full_like(adj_mat, -1).long() N = data.pos.shape[0] for n in range(N): adj = adj_mat[n].nonzero() num = num_list[n] adj_list[n, :num] = adj.t()[:, :num] adj_list = torch.cat([adj_list[:, :max_adj], num_list], dim=1) # N * max_adj return adj_list.type_as(edge), edge
def graph_kernel_dataset(name, path, sparse=True): dataset = TUDataset(path, name) dataset.data.edge_attr = None if dataset.data.x is None: max_degree = 0 degs = [] for data in dataset: degs += [degree(data.edge_index[0], dtype=torch.long)] max_degree = max(max_degree, degs[-1].max().item()) if max_degree < 1000: dataset.transform = T.OneHotDegree(max_degree) else: deg = torch.cat(degs, dim=0).to(torch.float) mean, std = deg.mean().item(), deg.std().item() dataset.transform = NormalizedDegree(mean, std) if not sparse: num_nodes = max_num_nodes = 0 for data in dataset: num_nodes += data.num_nodes max_num_nodes = max(data.num_nodes, max_num_nodes) # Filter out a few really large graphs in order to apply DiffPool. if name == 'REDDIT-BINARY': num_nodes = min(int(num_nodes / len(dataset) * 1.5), max_num_nodes) else: num_nodes = min(int(num_nodes / len(dataset) * 5), max_num_nodes) indices = [] for i, data in enumerate(dataset): if data.num_nodes <= num_nodes: indices.append(i) dataset = dataset[torch.Tensor(indices)] if dataset.transform is None: dataset.transform = T.ToDense(num_nodes) else: dataset.transform = T.Compose( [dataset.transform, T.ToDense(num_nodes)]) return dataset
def get_dataset(name, sparse=True): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name) dataset = TUDataset(path, name) dataset.data.edge_attr = None if dataset.data.x is None: max_degree = 0 degs = [] for data in dataset: degs += [degree(data.edge_index[0], dtype=torch.long)] max_degree = max(max_degree, degs[-1].max().item()) if max_degree < 1000: dataset.transform = T.OneHotDegree(max_degree) else: deg = torch.cat(degs, dim=0).to(torch.float) mean, std = deg.mean().item(), deg.std().item() dataset.transform = NormalizedDegree(mean, std) if not sparse: num_nodes = max_num_nodes = 0 for data in dataset: num_nodes += data.num_nodes max_num_nodes = max(data.num_nodes, max_num_nodes) if name == 'REDDIT-BINARY': num_nodes = min(int(num_nodes / len(dataset) * 1.5), max_num_nodes) else: num_nodes = min(int(num_nodes / len(dataset) * 5), max_num_nodes) indices = [] for i, data in enumerate(dataset): if data.num_nodes <= num_nodes: indices.append(i) dataset = dataset[torch.tensor(indices)] if dataset.transform is None: dataset.transform = T.ToDense(num_nodes) else: dataset.transform = T.Compose( [dataset.transform, T.ToDense(num_nodes)]) return dataset
def test(loader, max_nodes): model.eval() correct = 0 loss_all = 0 loader.dataset.transform = T.Compose( [delete_edge_attr, T.ToDense(max_nodes[0])]) i = 0 for data in loader: data.y = data.y.squeeze(-1) data = data.to(device) output = model(data)[0] pred = output.max(dim=1)[1] correct += pred.eq(data.y).sum().item() loss = F.nll_loss(output, data.y) loss_all += data.x.size(0) * loss.item() i += 1 loader.dataset.transform = T.Compose( [delete_edge_attr, T.ToDense(max_nodes[i])]) return correct / len(loader.dataset), loss_all / len(loader.dataset)
def get_dataset(name, sparse=True, cleaned=False, normalize=False): dataset = TUDataset(os.path.join('./data', name), name, use_node_attr=True, cleaned=cleaned) dataset.data.edge_attr = None if dataset.data.x is None: max_degree = 0 degs = [] for data in dataset: degs += [degree(data.edge_index[0], dtype=torch.long)] max_degree = max(max_degree, degs[-1].max().item()) if max_degree < 1000: dataset.transform = T.OneHotDegree(max_degree) else: deg = torch.cat(degs, dim=0).to(torch.float) mean, std = deg.mean().item(), deg.std().item() dataset.transform = NormalizedDegree(mean, std) elif normalize: dataset.data.x -= torch.mean(dataset.data.x, axis=0) dataset.data.x /= torch.std(dataset.data.x, axis=0) if not sparse: max_num_nodes = 0 for data in dataset: max_num_nodes = max(data.num_nodes, max_num_nodes) if dataset.transform is None: dataset.transform = T.ToDense(max_num_nodes) else: dataset.transform = T.Compose( [dataset.transform, T.ToDense(max_num_nodes)]) return dataset
def train(loader, max_nodes): model.train() loss_all = 0 loader.dataset.transform = T.Compose( [delete_edge_attr, T.ToDense(max_nodes[0])]) i = 0 for data in loader: data = data.to(device) data.y = data.y.squeeze(-1) optimizer.zero_grad() output, reg = model(data) loss = F.nll_loss(output, data.y) + reg loss.backward() loss_all += data.x.size(0) * loss.item() optimizer.step() i += 1 loader.dataset.transform = T.Compose( [delete_edge_attr, T.ToDense(max_nodes[i])]) return loss_all / len(loader.dataset)
def test_baseDataset_Loader(filted_dataset=None): root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize))) shutil.copytree('../input/smt', root) dataset = TUDataset(root, 'SMT') assert len(dataset) == 2688 assert dataset.num_features == 20 assert dataset.num_classes == 2 assert dataset.__repr__() == 'SMT(2688)' assert dataset[0].keys == ['x', 'edge_index', 'y'] # ==len(data.keys) assert len(dataset.shuffle()) == 2688 loader = DataLoader(dataset, batch_size=len(dataset)) assert loader.dataset.__repr__() == 'SMT(2688)' for batch in loader: assert batch.num_graphs == 2688 assert batch.num_nodes == sum([data.num_nodes for data in dataset]) # 2788794 assert batch.num_edges == sum([data.num_edges for data in dataset]) # 13347768 assert batch.keys == ['x', 'edge_index', 'y', 'batch'] num_nodes = sum(dataset.data.num_nodes) max_num_nodes = max(dataset.data.num_nodes) num_nodes = min(int(num_nodes / len(dataset) * 5), max_num_nodes) assert num_nodes == 5187 assert max_num_nodes == 34623 indices = [] for i, data in enumerate(dataset): if data.num_nodes < num_nodes: indices.append(i) if not filted_dataset: filted_dataset = dataset[torch.tensor(indices)] filted_dataset.transform = T.ToDense(num_nodes) # add 'adj' attribute assert ('adj' in dataset[0]) is False assert ('adj' in filted_dataset[0]) is True
class MyTransform(object): def __call__(self, data): # Only use node attributes. data.x = data.x[:, :-3] # Add self loops. arange = torch.arange(data.adj.size(-1), dtype=torch.long) data.adj[arange, arange] = 1 return data path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'ENZYMES_d') dataset = TUDataset(path, name='ENZYMES', transform=T.Compose([T.ToDense(max_nodes), MyTransform()]), pre_filter=MyFilter()) dataset = dataset.shuffle() n = (len(dataset) + 9) // 10 test_dataset = dataset[:n] val_dataset = dataset[n:2 * n] train_dataset = dataset[2 * n:] test_loader = DenseDataLoader(test_dataset, batch_size=20) val_loader = DenseDataLoader(val_dataset, batch_size=20) train_loader = DenseDataLoader(train_dataset, batch_size=20) class GNN(torch.nn.Module): def __init__(self, in_channels,
from torch_geometric.datasets import TUDataset import torch_geometric.transforms as T from torch_geometric.data import DenseDataLoader from torch_geometric.nn import DenseSAGEConv, dense_diff_pool max_nodes = 150 class MyFilter(object): def __call__(self, data): return data.num_nodes <= max_nodes path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'PROTEINS_dense') dataset = TUDataset(path, name='PROTEINS', transform=T.ToDense(max_nodes), pre_filter=MyFilter()) dataset = dataset.shuffle() n = (len(dataset) + 9) // 10 test_dataset = dataset[:n] val_dataset = dataset[n:2 * n] train_dataset = dataset[2 * n:] test_loader = DenseDataLoader(test_dataset, batch_size=20) val_loader = DenseDataLoader(val_dataset, batch_size=20) train_loader = DenseDataLoader(train_dataset, batch_size=20) class GNN(torch.nn.Module): def __init__(self, in_channels, hidden_channels, out_channels, normalize=False, add_loop=False, lin=True): super(GNN, self).__init__()
from torch_geometric.data import DenseDataLoader from torch_geometric.nn import DenseSAGEConv, dense_diff_pool max_nodes = 150 class MyFilter(object): def __call__(self, data): return data.num_nodes <= max_nodes path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'PROTEINS_dense') dataset = TUDataset(path, name='PROTEINS', transform=T.ToDense(max_nodes), pre_filter=MyFilter()) dataset = dataset.shuffle() n = (len(dataset) + 9) // 10 test_dataset = dataset[:n] val_dataset = dataset[n:2 * n] train_dataset = dataset[2 * n:] test_loader = DenseDataLoader(test_dataset, batch_size=20) val_loader = DenseDataLoader(val_dataset, batch_size=20) train_loader = DenseDataLoader(train_dataset, batch_size=20) class GNN(torch.nn.Module): def __init__(self, in_channels, hidden_channels,