def get_dataset(name, root, use_sparse_tensor): path = osp.join(osp.dirname(osp.realpath(__file__)), root, name) transform = T.ToSparseTensor() if use_sparse_tensor else None if name == 'ogbn-mag': if transform is None: transform = T.ToUndirected(merge=True) else: transform = T.Compose([T.ToUndirected(merge=True), transform]) dataset = OGB_MAG(root=path, preprocess='metapath2vec', transform=transform) elif name == 'ogbn-products': dataset = PygNodePropPredDataset('ogbn-products', root=path, transform=transform) elif name == 'Reddit': dataset = Reddit(root=path, transform=transform) return dataset[0], dataset.num_classes
def setup(self, stage: Optional[str] = None): self.data = OGB_MAG(self.root, preprocess='metapath2vec', transform=self.transform)[0]
from tqdm import tqdm import torch_geometric.transforms as T from torch_geometric.datasets import OGB_MAG from torch_geometric.loader import HGTLoader, NeighborLoader from torch_geometric.nn import Linear, SAGEConv, Sequential, to_hetero parser = argparse.ArgumentParser() parser.add_argument('--use_hgt_loader', action='store_true') args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') path = osp.join(osp.dirname(osp.realpath(__file__)), '../../data/OGB') transform = T.ToUndirected(merge=True) dataset = OGB_MAG(path, preprocess='metapath2vec', transform=transform) # Already send node features/labels to GPU for faster access during sampling: data = dataset[0].to(device, 'x', 'y') train_input_nodes = ('paper', data['paper'].train_mask) val_input_nodes = ('paper', data['paper'].val_mask) kwargs = {'batch_size': 1024, 'num_workers': 6, 'persistent_workers': True} if not args.use_hgt_loader: train_loader = NeighborLoader(data, num_neighbors=[10] * 2, shuffle=True, input_nodes=train_input_nodes, **kwargs) val_loader = NeighborLoader(data,
def prepare_data(self): OGB_MAG(self.root, preprocess='metapath2vec')
def run(args: argparse.ArgumentParser) -> None: for dataset_name in args.datasets: print(f"Dataset: {dataset_name}") root = osp.join(args.root, dataset_name) if dataset_name == 'mag': transform = T.ToUndirected(merge=True) dataset = OGB_MAG(root=root, transform=transform) train_idx = ('paper', dataset[0]['paper'].train_mask) eval_idx = ('paper', None) neighbor_sizes = args.hetero_neighbor_sizes else: dataset = PygNodePropPredDataset(f'ogbn-{dataset_name}', root) split_idx = dataset.get_idx_split() train_idx = split_idx['train'] eval_idx = None neighbor_sizes = args.homo_neighbor_sizes data = dataset[0].to(args.device) for num_neighbors in neighbor_sizes: print(f'Training sampling with {num_neighbors} neighbors') for batch_size in args.batch_sizes: train_loader = NeighborLoader( data, num_neighbors=num_neighbors, input_nodes=train_idx, batch_size=batch_size, shuffle=True, num_workers=args.num_workers, ) runtimes = [] num_iterations = 0 for run in range(args.runs): start = default_timer() for batch in tqdm.tqdm(train_loader): num_iterations += 1 stop = default_timer() runtimes.append(round(stop - start, 3)) average_time = round(sum(runtimes) / args.runs, 3) print(f'batch size={batch_size}, iterations={num_iterations}, ' f'runtimes={runtimes}, average runtime={average_time}') print('Evaluation sampling with all neighbors') for batch_size in args.eval_batch_sizes: subgraph_loader = NeighborLoader( data, num_neighbors=[-1], input_nodes=eval_idx, batch_size=batch_size, shuffle=False, num_workers=args.num_workers, ) runtimes = [] num_iterations = 0 for run in range(args.runs): start = default_timer() for batch in tqdm.tqdm(subgraph_loader): num_iterations += 1 stop = default_timer() runtimes.append(round(stop - start, 3)) average_time = round(sum(runtimes) / args.runs, 3) print(f'batch size={batch_size}, iterations={num_iterations}, ' f'runtimes={runtimes}, average runtime={average_time}')