def test_index_to_mask(): index = torch.tensor([1, 3, 5]) mask = index_to_mask(index) assert mask.tolist() == [False, True, False, True, False, True] mask = index_to_mask(index, size=7) assert mask.tolist() == [False, True, False, True, False, True, False]
def random_planetoid_splits(data, num_classes): # Set new random planetoid splits: # * 20 * num_classes labels for training # * 500 labels for validation # * 1000 labels for testing indices = [] for i in range(num_classes): index = (data.y == i).nonzero().view(-1) index = index[torch.randperm(index.size(0))] indices.append(index) train_index = torch.cat([i[:20] for i in indices], dim=0) rest_index = torch.cat([i[20:] for i in indices], dim=0) rest_index = rest_index[torch.randperm(rest_index.size(0))] data.train_mask = index_to_mask(train_index, size=data.num_nodes) data.val_mask = index_to_mask(rest_index[:500], size=data.num_nodes) data.test_mask = index_to_mask(rest_index[500:1500], size=data.num_nodes) return data
def test_bipartite_subgraph(): edge_index = torch.tensor([[0, 5, 2, 3, 3, 4, 4, 3, 5, 5, 6], [0, 0, 3, 2, 0, 0, 2, 1, 2, 3, 1]]) edge_attr = torch.Tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) idx = (torch.tensor([2, 3, 5], dtype=torch.long), torch.tensor([2, 3], dtype=torch.long)) mask = (index_to_mask(idx[0], 7), index_to_mask(idx[1], 4)) indices = (idx[0].tolist(), idx[1].tolist()) for subset in [idx, mask, indices]: out = bipartite_subgraph(subset, edge_index, edge_attr, return_edge_mask=True) assert out[0].tolist() == [[2, 3, 5, 5], [3, 2, 2, 3]] assert out[1].tolist() == [3, 4, 9, 10] assert out[2].tolist() == [0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0] out = bipartite_subgraph(subset, edge_index, edge_attr, relabel_nodes=True) assert out[0].tolist() == [[0, 1, 2, 2], [1, 0, 0, 1]] assert out[1].tolist() == [3, 4, 9, 10]
def test_subgraph(): edge_index = torch.tensor([ [0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6], [1, 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5], ]) edge_attr = torch.Tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) idx = torch.tensor([3, 4, 5], dtype=torch.long) mask = index_to_mask(idx, 7) indices = idx.tolist() for subset in [idx, mask, indices]: out = subgraph(subset, edge_index, edge_attr, return_edge_mask=True) assert out[0].tolist() == [[3, 4, 4, 5], [4, 3, 5, 4]] assert out[1].tolist() == [7, 8, 9, 10] assert out[2].tolist() == [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0] out = subgraph(subset, edge_index, edge_attr, relabel_nodes=True) assert out[0].tolist() == [[0, 1, 1, 2], [1, 0, 2, 1]] assert out[1].tolist() == [7, 8, 9, 10]
def load_ogb(name, dataset_dir): r""" Load OGB dataset objects. Args: name (string): dataset name dataset_dir (string): data directory Returns: PyG dataset object """ from ogb.graphproppred import PygGraphPropPredDataset from ogb.linkproppred import PygLinkPropPredDataset from ogb.nodeproppred import PygNodePropPredDataset if name[:4] == 'ogbn': dataset = PygNodePropPredDataset(name=name, root=dataset_dir) splits = dataset.get_idx_split() split_names = ['train_mask', 'val_mask', 'test_mask'] for i, key in enumerate(splits.keys()): mask = index_to_mask(splits[key], size=dataset.data.y.shape[0]) set_dataset_attr(dataset, split_names[i], mask, len(mask)) edge_index = to_undirected(dataset.data.edge_index) set_dataset_attr(dataset, 'edge_index', edge_index, edge_index.shape[1]) elif name[:4] == 'ogbg': dataset = PygGraphPropPredDataset(name=name, root=dataset_dir) splits = dataset.get_idx_split() split_names = [ 'train_graph_index', 'val_graph_index', 'test_graph_index' ] for i, key in enumerate(splits.keys()): id = splits[key] set_dataset_attr(dataset, split_names[i], id, len(id)) elif name[:4] == "ogbl": dataset = PygLinkPropPredDataset(name=name, root=dataset_dir) splits = dataset.get_edge_split() id = splits['train']['edge'].T if cfg.dataset.resample_negative: set_dataset_attr(dataset, 'train_pos_edge_index', id, id.shape[1]) dataset.transform = neg_sampling_transform else: id_neg = negative_sampling(edge_index=id, num_nodes=dataset.data.num_nodes, num_neg_samples=id.shape[1]) id_all = torch.cat([id, id_neg], dim=-1) label = create_link_label(id, id_neg) set_dataset_attr(dataset, 'train_edge_index', id_all, id_all.shape[1]) set_dataset_attr(dataset, 'train_edge_label', label, len(label)) id, id_neg = splits['valid']['edge'].T, splits['valid']['edge_neg'].T id_all = torch.cat([id, id_neg], dim=-1) label = create_link_label(id, id_neg) set_dataset_attr(dataset, 'val_edge_index', id_all, id_all.shape[1]) set_dataset_attr(dataset, 'val_edge_label', label, len(label)) id, id_neg = splits['test']['edge'].T, splits['test']['edge_neg'].T id_all = torch.cat([id, id_neg], dim=-1) label = create_link_label(id, id_neg) set_dataset_attr(dataset, 'test_edge_index', id_all, id_all.shape[1]) set_dataset_attr(dataset, 'test_edge_label', label, len(label)) else: raise ValueError('OGB dataset: {} non-exist') return dataset
x = norm(conv(x, edge_index)).relu() return self.convs[-1](x, edge_index) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') data = dataset[0].to(device) data.y = data.y.view(-1) model = UniMP(dataset.num_features, dataset.num_classes, hidden_channels=64, num_layers=3, heads=2).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0005) split_idx = dataset.get_idx_split() train_mask = index_to_mask(split_idx['train'], size=data.num_nodes) val_mask = index_to_mask(split_idx['valid'], size=data.num_nodes) test_mask = index_to_mask(split_idx['test'], size=data.num_nodes) def train(label_rate=0.65): # How many labels to use for propagation. model.train() propagation_mask = MaskLabel.ratio_mask(train_mask, ratio=label_rate) supervision_mask = train_mask ^ propagation_mask optimizer.zero_grad() out = model(data.x, data.y, data.edge_index, propagation_mask) loss = F.cross_entropy(out[supervision_mask], data.y[supervision_mask]) loss.backward() optimizer.step()
x = self.norm(x).relu() x = F.dropout(x, p=self.dropout, training=self.training) return self.lin2(x) from ogb.nodeproppred import Evaluator, PygNodePropPredDataset # noqa transform = T.AddSelfLoops() root = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'products') dataset = PygNodePropPredDataset('ogbn-products', root, transform=transform) evaluator = Evaluator(name='ogbn-products') data = dataset[0] split_idx = dataset.get_idx_split() for split in ['train', 'valid', 'test']: data[f'{split}_mask'] = index_to_mask(split_idx[split], data.y.shape[0]) train_loader = RandomNodeSampler(data, num_parts=10, shuffle=True, num_workers=5) # Increase the num_parts of the test loader if you cannot fit # the full batch graph into your GPU: test_loader = RandomNodeSampler(data, num_parts=1, num_workers=5) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = RevGNN( in_channels=dataset.num_features, hidden_channels=160, out_channels=dataset.num_classes, num_layers=7, # You can try 1000 layers for fun