def _loadDataset(self, dataset: DataSet, device: torch.device) -> torch_geometric.data.Data: """ a loader function for the requested dataset """ dataset_path = osp.join(getGitPath(), 'datasets') if dataset is DataSet.PUBMED or dataset is DataSet.CORA or dataset is DataSet.CITESEER: dataset = Planetoid(dataset_path, dataset.string()) elif dataset is DataSet.TWITTER: twitter_glove_path = osp.join(dataset_path, 'twitter', 'glove.pkl') if not osp.exists(twitter_glove_path): exit( "Go to README and follow the download instructions to the TWITTER dataset" ) else: dataset = TwitterDataset(osp.dirname(twitter_glove_path)) with open(twitter_glove_path, 'rb') as file: glove_matrix = pickle.load(file) self.glove_matrix = torch.tensor( glove_matrix, dtype=torch.float32).to(device) data = dataset[0].to(self.device) setattr(data, 'num_classes', dataset.num_classes) self.num_features = data.num_features self.num_classes = dataset.num_classes return data
def test_resample_disjoint(self): pyg_dataset = Planetoid("./cora", "Cora") graphs = GraphDataset.pyg_to_graphs(pyg_dataset) graph = graphs[0] graph = Graph(node_label=graph.node_label, node_feature=graph.node_feature, edge_index=graph.edge_index, edge_feature=graph.edge_feature, directed=False) graphs = [graph] dataset = GraphDataset(graphs, task="link_pred", edge_train_mode="disjoint", edge_message_ratio=0.8, resample_disjoint=True, resample_disjoint_period=1) dataset_train, _, _ = dataset.split(split_ratio=[0.5, 0.2, 0.3]) graph_train_first = dataset_train[0] graph_train_second = dataset_train[0] self.assertEqual(graph_train_first.edge_label_index.shape[1], graph_train_second.edge_label_index.shape[1]) self.assertTrue( torch.equal(graph_train_first.edge_label, graph_train_second.edge_label))
def learning_methods_on_graphs(): dataset = 'Cora' path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'data', dataset) dataset = Planetoid(root=path, name=dataset) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = Net(dataset).to(device) data = dataset[0].to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4) model.train() for epoch in range(1000): optimizer.zero_grad() out = model(data) loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask]) loss.backward() optimizer.step() print("{} {}".format(epoch, loss.item())) pass model.eval() _, pred = model(data).max(dim=1) correct = float(pred[data.test_mask].eq( data.y[data.test_mask]).sum().item()) acc = correct / data.test_mask.sum().item() print('Accuracy: {:.4f}'.format(acc)) pass
def load_dataset(dataset): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) if dataset in ['cora', 'citeseer', 'pubmed']: dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures()) num_features = dataset.num_features num_classes = dataset.num_classes data = dataset[0] data.adj = torch.zeros((data.x.size(0), data.x.size(0))) col, row = data.edge_index data.adj[col, row] = 1 return data, num_features, num_classes elif dataset == 'reddit': dataset = Reddit(path) elif dataset == 'corafull': dataset = CoraFull(path) num_features = dataset.num_features num_classes = dataset.num_classes data = dataset[0] data.train_mask, data.val_mask, data.test_mask = generate_split( data, num_classes) data.adj = torch.zeros((data.x.size[0], data.x.size(0))) col, row = data.edge_index data.adj[col, row] = 1 return data, num_features, num_classes
def load_dataset(root: str, name: str, *args, **kwargs) -> Dataset: r"""Returns a variety of datasets according to :obj:`name`.""" if 'karate' in name.lower(): from torch_geometric.datasets import KarateClub return KarateClub(*args, **kwargs) if name.lower() in ['cora', 'citeseer', 'pubmed']: from torch_geometric.datasets import Planetoid path = osp.join(root, 'Planetoid', name) return Planetoid(path, name, *args, **kwargs) if name in ['BZR', 'ENZYMES', 'IMDB-BINARY', 'MUTAG']: from torch_geometric.datasets import TUDataset path = osp.join(root, 'TUDataset') return TUDataset(path, name, *args, **kwargs) if name in ['ego-facebook', 'soc-Slashdot0811', 'wiki-vote']: from torch_geometric.datasets import SNAPDataset path = osp.join(root, 'SNAPDataset') return SNAPDataset(path, name, *args, **kwargs) if name.lower() in ['bashapes']: from torch_geometric.datasets import BAShapes return BAShapes(*args, **kwargs) if name.lower() in ['dblp']: from torch_geometric.datasets import DBLP path = osp.join(root, 'DBLP') return DBLP(path, *args, **kwargs) if name in ['citationCiteseer', 'illc1850']: from torch_geometric.datasets import SuiteSparseMatrixCollection path = osp.join(root, 'SuiteSparseMatrixCollection') return SuiteSparseMatrixCollection(path, name=name, *args, **kwargs) if 'elliptic' in name.lower(): from torch_geometric.datasets import EllipticBitcoinDataset path = osp.join(root, 'EllipticBitcoinDataset') return EllipticBitcoinDataset(path, *args, **kwargs) raise NotImplementedError
def load_data( dataset="Cora", supervised=True, ): ''' support semi-supervised and supervised :param dataset: :param supervised: :return: ''' path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) if dataset in ["CS", "Physics"]: dataset = Coauthor(path, dataset, T.NormalizeFeatures()) elif dataset in ["Computers", "Photo"]: dataset = Amazon(path, dataset, T.NormalizeFeatures()) elif dataset in ["Cora", "Citeseer", "Pubmed"]: dataset = Planetoid(path, dataset, T.NormalizeFeatures()) data = dataset[0] if supervised: data.train_mask = torch.zeros(data.num_nodes, dtype=torch.uint8) data.train_mask[:-1000] = 1 data.val_mask = torch.zeros(data.num_nodes, dtype=torch.uint8) data.val_mask[-1000:-500] = 1 data.test_mask = torch.zeros(data.num_nodes, dtype=torch.uint8) data.test_mask[-500:] = 1 data.num_classes = data.y.max().item() + 1 return dataset
def test_split(self): pyg_dataset = Planetoid("./cora", "Cora") dg = Graph.pyg_to_graph(pyg_dataset[0]) dg_node = dg.split() dg_num_nodes = dg.num_nodes node_0 = int(0.8 * dg_num_nodes) node_1 = int(0.1 * dg_num_nodes) node_2 = dg_num_nodes - node_0 - node_1 self.assertEqual(dg_node[0].node_label_index.shape[0], node_0) self.assertEqual(dg_node[1].node_label_index.shape[0], node_1) self.assertEqual(dg_node[2].node_label_index.shape[0], node_2) for split_ratio in [[0.1, 0.4, 0.5], [0.4, 0.3, 0.3], [0.7, 0.2, 0.1]]: dg_link_custom = (dg.split(task="link_pred", split_ratio=split_ratio)) dg_num_edges = dg.num_edges edge_0 = 2 * int(split_ratio[0] * dg_num_edges) edge_1 = 2 * int(split_ratio[1] * dg_num_edges) edge_2 = 2 * (dg_num_edges - int(split_ratio[0] * dg_num_edges) - int(split_ratio[1] * dg_num_edges)) self.assertEqual( dg_link_custom[0].edge_label_index.shape[1], edge_0, ) self.assertEqual( dg_link_custom[1].edge_label_index.shape[1], edge_1, ) self.assertEqual( dg_link_custom[2].edge_label_index.shape[1], edge_2, )
def get_dataset(name: str, use_lcc: bool = True) -> InMemoryDataset: path = os.path.join(DATA_PATH, name) if name in ['Cora', 'Citeseer', 'Pubmed']: dataset = Planetoid(path, name) elif name in ['Computers', 'Photo']: dataset = Amazon(path, name) elif name == 'CoauthorCS': dataset = Coauthor(path, 'CS') else: raise Exception('Unknown dataset.') if use_lcc: lcc = get_largest_connected_component(dataset) x_new = dataset.data.x[lcc] y_new = dataset.data.y[lcc] row, col = dataset.data.edge_index.numpy() edges = [[i, j] for i, j in zip(row, col) if i in lcc and j in lcc] edges = remap_edges(edges, get_node_mapper(lcc)) data = Data(x=x_new, edge_index=torch.LongTensor(edges), y=y_new, train_mask=torch.zeros(y_new.size()[0], dtype=torch.bool), test_mask=torch.zeros(y_new.size()[0], dtype=torch.bool), val_mask=torch.zeros(y_new.size()[0], dtype=torch.bool)) dataset.data = data return dataset
def __init__(self): dataset = "PubMed" path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) if not osp.exists(path): Planetoid(path, dataset, T.TargetIndegree()) super(PubMedDataset, self).__init__(path, dataset, T.TargetIndegree())
def test_citeseer(): root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize))) dataset = Planetoid(root, 'Citeseer') loader = DataLoader(dataset, batch_size=len(dataset)) assert len(dataset) == 1 assert dataset.__repr__() == 'Citeseer()' for data in loader: assert data.num_graphs == 1 assert data.num_nodes == 3327 assert data.num_edges / 2 == 4552 assert len(data) == 7 assert list(data.x.size()) == [data.num_nodes, 3703] assert list(data.y.size()) == [data.num_nodes] assert data.y.max() + 1 == 6 assert data.train_mask.sum() == 6 * 20 assert data.val_mask.sum() == 500 assert data.test_mask.sum() == 1000 assert (data.train_mask & data.val_mask & data.test_mask).sum() == 0 assert list(data.batch.size()) == [data.num_nodes] assert data.contains_isolated_nodes() assert not data.contains_self_loops() assert data.is_undirected() dataset = Planetoid(root, 'Citeseer', split='full') data = dataset[0] assert data.val_mask.sum() == 500 assert data.test_mask.sum() == 1000 assert data.train_mask.sum() == data.num_nodes - 1500 assert (data.train_mask & data.val_mask & data.test_mask).sum() == 0 dataset = Planetoid(root, 'Citeseer', split='random', num_train_per_class=11, num_val=29, num_test=41) data = dataset[0] assert data.train_mask.sum() == dataset.num_classes * 11 assert data.val_mask.sum() == 29 assert data.test_mask.sum() == 41 assert (data.train_mask & data.val_mask & data.test_mask).sum() == 0 shutil.rmtree(root)
def prepare_data(self): path = osp.join( osp.dirname(osp.realpath(__file__)), "data", self.NAME ) print(path) self.dataset = Planetoid(path, self.NAME, transform=self._transform) self.data = self.dataset[0] print(self.dataset)
def load_dataset(name): name = name.lower() if name in ['cora', 'citeseer', 'pubmed']: return Planetoid(root=name, name=name, pre_transform=pre_transform) elif name == 'reddit': # TODO: raise NotImplementedError
def get_data2(folder="node_classify/cora", data_name="cora"): dataset = Planetoid( root=folder, name=data_name, # pre_transform=T.KNNGraph(k=6), # transform=T.NormalizeFeatures())#, transform=T.TargetIndegree()) return dataset
def get_dataset(name): if name in ['Cora', 'Citeseer', 'Pubmed']: dataset = Planetoid(path + name, name) elif name in ['Computers', 'Photo']: dataset = Amazon(path + name, name) else: raise Exception('Unknown dataset.') return dataset
def get_data(dataset_name, dataset_dir): full_names = {'cora': 'Cora', 'citeseer': 'CiteSeer', 'pubmed': 'PubMed'} dataset_name = full_names[dataset_name] dataset_path = path.join(dataset_dir, dataset_name) dataset = Planetoid(dataset_path, dataset_name, transform=T.NormalizeFeatures()) return dataset
def main(): args = arg_parse() if args.dataset == 'cora': dataset = Planetoid(root='/tmp/Cora', name='Cora') train(dataset, 'node', args) else: raise RuntimeError('Unknown datasets')
def load_pubmed(): from torch_geometric.datasets import Planetoid dirpath = './data/pubmed' dataset = Planetoid(dirpath, 'Pubmed') data = dataset[0] G = nx.Graph() G.add_edges_from(data.edge_index.numpy().T.tolist()) return G, nx.adjacency_matrix(G), data.y.numpy().tolist()
def load_planetoid(dataset): data_name = ['Cora', 'CiteSeer', 'PubMed'] assert dataset in data_name path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'Datasets', 'NodeData') transforms = T.Compose([T.AddSelfLoops()]) dataset = Planetoid(path, dataset, transform=transforms) return dataset, dataset[0]
def __init__(self, name): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name) self.dataset = Planetoid(path, "Cora", T.NormalizeFeatures()) data = self.dataset[0] data.train_mask = data.val_mask = data.test_mask = data.y = None self.num_features = self.dataset.num_features self.reconstruction_loss = None
def main(): dataset = 'Cora' path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) dataset = Planetoid(path, dataset) data = dataset[0] device = 'cuda' if torch.cuda.is_available() else 'cpu' model = Node2Vec(data.edge_index, embedding_dim=128, walk_length=20, context_size=10, walks_per_node=10, num_negative_samples=1, p=1, q=1, sparse=True).to(device) loader = model.loader(batch_size=128, shuffle=True, num_workers=4) optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=0.01) def train(): model.train() total_loss = 0 for pos_rw, neg_rw in loader: optimizer.zero_grad() loss = model.loss(pos_rw.to(device), neg_rw.to(device)) loss.backward() optimizer.step() total_loss += loss.item() return total_loss / len(loader) @torch.no_grad() def test(): model.eval() z = model() acc = model.test(z[data.train_mask], data.y[data.train_mask], z[data.test_mask], data.y[data.test_mask], max_iter=150) return acc for epoch in range(1, 101): loss = train() acc = test() print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Acc: {acc:.4f}') @torch.no_grad() def plot_points(colors): model.eval() z = model(torch.arange(data.num_nodes, device=device)) z = TSNE(n_components=2).fit_transform(z.cpu().numpy()) y = data.y.cpu().numpy() plt.figure(figsize=(8, 8)) for i in range(dataset.num_classes): plt.scatter(z[y == i, 0], z[y == i, 1], s=20, color=colors[i]) plt.axis('off') plt.show() colors = [ '#ffc0cb', '#bada55', '#008080', '#420420', '#7fe5f0', '#065535', '#ffd700' ] plot_points(colors)
def test_lightning_node_data(strategy, loader): import pytorch_lightning as pl root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize))) dataset = Planetoid(root, name='Cora') data = dataset[0] data_repr = ('Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], ' 'train_mask=[2708], val_mask=[2708], test_mask=[2708])') shutil.rmtree(root) model = LinearNodeModule(dataset.num_features, dataset.num_classes) if strategy is None or loader == 'full': gpus = 1 else: gpus = torch.cuda.device_count() if strategy == 'ddp_spawn' and loader == 'full': data = data.cuda() # This is necessary to test sharing of data. if strategy == 'ddp_spawn': strategy = pl.plugins.DDPSpawnPlugin(find_unused_parameters=False) batch_size = 1 if loader == 'full' else 32 num_workers = 0 if loader == 'full' else 3 kwargs, kwargs_repr = {}, '' if loader == 'neighbor': kwargs['num_neighbors'] = [5] kwargs_repr += 'num_neighbors=[5], ' trainer = pl.Trainer(strategy=strategy, gpus=gpus, max_epochs=5, log_every_n_steps=1) datamodule = LightningNodeData(data, loader=loader, batch_size=batch_size, num_workers=num_workers, **kwargs) old_x = data.x.clone().cpu() assert str(datamodule) == (f'LightningNodeData(data={data_repr}, ' f'loader={loader}, batch_size={batch_size}, ' f'num_workers={num_workers}, {kwargs_repr}' f'pin_memory={loader != "full"}, ' f'persistent_workers={loader != "full"})') trainer.fit(model, datamodule) new_x = data.x.cpu() if loader == 'full': offset = 5 + 5 + 1 # `train_steps` + `val_steps` + `sanity` else: offset = 0 offset += gpus * 2 # `sanity` offset += 5 * gpus * math.ceil(140 / (gpus * batch_size)) # `train` offset += 5 * gpus * math.ceil(500 / (gpus * batch_size)) # `val` assert torch.all(new_x > (old_x + offset - 4)) # Ensure shared data. assert trainer._data_connector._val_dataloader_source.is_defined() assert trainer._data_connector._test_dataloader_source.is_defined()
def main(): net_name = 'Pubmed' dataset = Planetoid(root='/tmp/' + net_name, name=net_name) data = dataset[0] edge_index = data.edge_index adj_list = edge_index.numpy().T G = nx.Graph() G.add_edges_from(adj_list) draw_degree_distribution(G)
def exp(exp_name, seed, style, shared): torch.manual_seed(seed) dataset = 'Cora' path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'data', dataset) dataset = Planetoid(path, dataset, T.NormalizeFeatures()) data = dataset[0] fold = 0 accuracies = [] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') with open('{}.log'.format(exp_name), 'w') as flog: for tr_mask, vl_mask, ts_mask in gen_folds(data.x.shape[0], FOLDS, FOLDS_SEED): fold += 1 print("FOLD:", fold) flog.write("fold #{}\n".format(fold)) data.train_mask = tr_mask data.val_mask = vl_mask data.test_mask = ts_mask print('Train: {}'.format(torch.sum(data.train_mask))) print('Validation: {}'.format(torch.sum(data.val_mask))) print('Test: {}'.format(torch.sum(data.test_mask))) data = data.to(device) #model = GINNet(dataset).to(device) model = GIN(dataset, 2, 64, seed).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001) best_acc = 0 count = 0 for epoch in range(1, EPOCH): train(model, data, optimizer) train_accs = validate(model, data) log = 'Epoch: {:03d}, Train: {:.4f}, Validation: {:.4f}' print(log.format(epoch, *train_accs)) log += '\n' flog.write(log.format(epoch, *train_accs)) if train_accs[1] > best_acc: best_acc = train_accs[1] torch.save(model.state_dict(), "{}.dth".format(exp_name)) print("Saving model at iteration {}".format(epoch)) count = 0 else: count += 1 if count == 200: break model.load_state_dict(torch.load("{}.dth".format(exp_name))) accs = test(model, data) print('Test Accuracy: {}'.format(accs[1])) flog.write('Test Accuracy: {}\n'.format(accs[1])) accuracies.append(accs[1]) flog.write("----------\n") flog.write("Avg Test Accuracy: {}\tVariance: {}\n".format( np.mean(accuracies), np.var(accuracies)))
def main(): args = arg_parse() path = Path("../data") if args.dataset == 'cora': dataset = Planetoid(root=path / 'Cora', name='Cora', split='random', num_train_per_class=77) task = 'node' elif args.dataset == 'citeseer': dataset = Planetoid(root=path / 'CiteSeer', name='CiteSeer', split='random', num_train_per_class=111) task = 'node' # print(dataset.data) # print(dataset.num_classes) return train(dataset, task, args)
def main(): args = arg_parse() pyg_dataset = Planetoid('./cora', 'Cora', transform=T.TargetIndegree()) # the input that we assume users have edge_train_mode = args.mode print('edge train mode: {}'.format(edge_train_mode)) graphs = GraphDataset.pyg_to_graphs(pyg_dataset, tensor_backend=True) if args.multigraph: graphs = [copy.deepcopy(graphs[0]) for _ in range(10)] dataset = GraphDataset(graphs, task='link_pred', edge_message_ratio=args.edge_message_ratio, edge_train_mode=edge_train_mode) print('Initial dataset: {}'.format(dataset)) # split dataset datasets = {} datasets['train'], datasets['val'], datasets['test']= dataset.split( transductive=not args.multigraph, split_ratio=[0.85, 0.05, 0.1]) print('after split') print('Train message-passing graph: {} nodes; {} edges.'.format( datasets['train'][0].num_nodes, datasets['train'][0].num_edges)) print('Val message-passing graph: {} nodes; {} edges.'.format( datasets['val'][0].num_nodes, datasets['val'][0].num_edges)) print('Test message-passing graph: {} nodes; {} edges.'.format( datasets['test'][0].num_nodes, datasets['test'][0].num_edges)) # node feature dimension input_dim = datasets['train'].num_node_features # link prediction needs 2 classes (0, 1) num_classes = datasets['train'].num_edge_labels model = Net(input_dim, num_classes, args).to(args.device) #optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-3) optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs) follow_batch = [] # e.g., follow_batch = ['edge_index'] dataloaders = {split: DataLoader( ds, collate_fn=Batch.collate(follow_batch), batch_size=args.batch_size, shuffle=(split=='train')) for split, ds in datasets.items()} print('Graphs after split: ') for key, dataloader in dataloaders.items(): for batch in dataloader: print(key, ': ', batch) train(model, dataloaders, optimizer, args, scheduler=scheduler)
def test_heterogeneous_neighbor_loader_on_cora(directed): root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize))) dataset = Planetoid(root, 'Cora') data = dataset[0] data.edge_weight = torch.rand(data.num_edges) hetero_data = HeteroData() hetero_data['paper'].x = data.x hetero_data['paper'].n_id = torch.arange(data.num_nodes) hetero_data['paper', 'paper'].edge_index = data.edge_index hetero_data['paper', 'paper'].edge_weight = data.edge_weight split_idx = torch.arange(5, 8) loader = NeighborLoader(hetero_data, num_neighbors=[-1, -1], batch_size=split_idx.numel(), input_nodes=('paper', split_idx), directed=directed) assert len(loader) == 1 hetero_batch = next(iter(loader)) batch_size = hetero_batch['paper'].batch_size if not directed: n_id, _, _, e_mask = k_hop_subgraph(split_idx, num_hops=2, edge_index=data.edge_index, num_nodes=data.num_nodes) n_id = n_id.sort()[0] assert n_id.tolist() == hetero_batch['paper'].n_id.sort()[0].tolist() assert hetero_batch['paper', 'paper'].num_edges == int(e_mask.sum()) class GNN(torch.nn.Module): def __init__(self, in_channels, hidden_channels, out_channels): super().__init__() self.conv1 = GraphConv(in_channels, hidden_channels) self.conv2 = GraphConv(hidden_channels, out_channels) def forward(self, x, edge_index, edge_weight): x = self.conv1(x, edge_index, edge_weight).relu() x = self.conv2(x, edge_index, edge_weight).relu() return x model = GNN(dataset.num_features, 16, dataset.num_classes) hetero_model = to_hetero(model, hetero_data.metadata()) out1 = model(data.x, data.edge_index, data.edge_weight)[split_idx] out2 = hetero_model(hetero_batch.x_dict, hetero_batch.edge_index_dict, hetero_batch.edge_weight_dict)['paper'][:batch_size] assert torch.allclose(out1, out2, atol=1e-6) try: shutil.rmtree(root) except PermissionError: pass
def __init__(self): dataset = "CiteSeer" path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) if not osp.exists(path): Planetoid(path, dataset, transform=T.TargetIndegree()) super(CiteSeerDataset, self).__init__(path, dataset, transform=T.TargetIndegree())
def test_cora(): root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize))) dataset = Planetoid(root, 'Cora') data = dataset[0] class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = SAGEConv(dataset.num_features, 16) self.conv2 = SAGEConv(16, 16) self.conv3 = SAGEConv(16, dataset.num_classes) def forward_data_flow(self, x, data_flow): block = data_flow[0] x = F.relu(self.conv1(x, block.edge_index, size=block.size)) block = data_flow[1] x = F.relu(self.conv2(x, block.edge_index, size=block.size)) block = data_flow[2] x = self.conv3(x, block.edge_index, size=block.size) return x def forward(self, x, edge_index): x = F.relu(self.conv1(x, edge_index)) x = F.relu(self.conv2(x, edge_index)) return self.conv3(x, edge_index) model = Net() out_all = model(data.x, data.edge_index) loader = NeighborSampler(data, size=1.0, num_hops=3, batch_size=64, shuffle=False, drop_last=False, bipartite=True, add_self_loops=True) for data_flow in loader(data.train_mask): out = model.forward_data_flow(data.x[data_flow[0].n_id], data_flow) assert torch.allclose(out_all[data_flow.n_id], out) loader = NeighborSampler(data, size=1.0, num_hops=3, batch_size=64, shuffle=False, drop_last=False, bipartite=False) for subdata in loader(data.train_mask): out = model(data.x[subdata.n_id], subdata.edge_index)[subdata.sub_b_id] assert torch.allclose(out_all[subdata.b_id], out) shutil.rmtree(root)
def get_planetoid_dataset(name, normalize_features=False, transform=None, split="public"): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name) if split == 'complete': dataset = Planetoid(path, name) dataset[0].train_mask.fill_(False) dataset[0].train_mask[:dataset[0].num_nodes - 1000] = 1 dataset[0].val_mask.fill_(False) dataset[0].val_mask[dataset[0].num_nodes - 1000:dataset[0].num_nodes - 500] = 1 dataset[0].test_mask.fill_(False) dataset[0].test_mask[dataset[0].num_nodes - 500:] = 1 else: dataset = Planetoid(path, name, split=split) if transform is not None and normalize_features: dataset.transform = T.Compose([T.NormalizeFeatures(), transform]) elif normalize_features: dataset.transform = T.NormalizeFeatures() elif transform is not None: dataset.transform = transform return dataset
def load_dataset(dataset_folder, dataset_name): """ 导入数据集,并处理为Data格式 :param dataset_folder: 数据集存储路径 :param dataset_name: 数据集的名字("Cora", "CiteSeer", "PubMed") :return: dataset """ path = os.path.join(os.path.dirname(dataset_folder), dataset_name) dataset = Planetoid(path, dataset_name, T.NormalizeFeatures()) return dataset