Ejemplo n.º 1
0
def get_dataset(name, root, use_sparse_tensor):
    path = osp.join(osp.dirname(osp.realpath(__file__)), root, name)
    transform = T.ToSparseTensor() if use_sparse_tensor else None
    if name == 'ogbn-mag':
        if transform is None:
            transform = T.ToUndirected(merge=True)
        else:
            transform = T.Compose([T.ToUndirected(merge=True), transform])
        dataset = OGB_MAG(root=path,
                          preprocess='metapath2vec',
                          transform=transform)
    elif name == 'ogbn-products':
        dataset = PygNodePropPredDataset('ogbn-products',
                                         root=path,
                                         transform=transform)
    elif name == 'Reddit':
        dataset = Reddit(root=path, transform=transform)

    return dataset[0], dataset.num_classes
Ejemplo n.º 2
0
 def setup(self, stage: Optional[str] = None):
     self.data = OGB_MAG(self.root,
                         preprocess='metapath2vec',
                         transform=self.transform)[0]
Ejemplo n.º 3
0
from tqdm import tqdm

import torch_geometric.transforms as T
from torch_geometric.datasets import OGB_MAG
from torch_geometric.loader import HGTLoader, NeighborLoader
from torch_geometric.nn import Linear, SAGEConv, Sequential, to_hetero

parser = argparse.ArgumentParser()
parser.add_argument('--use_hgt_loader', action='store_true')
args = parser.parse_args()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

path = osp.join(osp.dirname(osp.realpath(__file__)), '../../data/OGB')
transform = T.ToUndirected(merge=True)
dataset = OGB_MAG(path, preprocess='metapath2vec', transform=transform)

# Already send node features/labels to GPU for faster access during sampling:
data = dataset[0].to(device, 'x', 'y')

train_input_nodes = ('paper', data['paper'].train_mask)
val_input_nodes = ('paper', data['paper'].val_mask)
kwargs = {'batch_size': 1024, 'num_workers': 6, 'persistent_workers': True}

if not args.use_hgt_loader:
    train_loader = NeighborLoader(data,
                                  num_neighbors=[10] * 2,
                                  shuffle=True,
                                  input_nodes=train_input_nodes,
                                  **kwargs)
    val_loader = NeighborLoader(data,
Ejemplo n.º 4
0
 def prepare_data(self):
     OGB_MAG(self.root, preprocess='metapath2vec')
Ejemplo n.º 5
0
def run(args: argparse.ArgumentParser) -> None:
    for dataset_name in args.datasets:
        print(f"Dataset: {dataset_name}")
        root = osp.join(args.root, dataset_name)

        if dataset_name == 'mag':
            transform = T.ToUndirected(merge=True)
            dataset = OGB_MAG(root=root, transform=transform)
            train_idx = ('paper', dataset[0]['paper'].train_mask)
            eval_idx = ('paper', None)
            neighbor_sizes = args.hetero_neighbor_sizes
        else:
            dataset = PygNodePropPredDataset(f'ogbn-{dataset_name}', root)
            split_idx = dataset.get_idx_split()
            train_idx = split_idx['train']
            eval_idx = None
            neighbor_sizes = args.homo_neighbor_sizes

        data = dataset[0].to(args.device)

        for num_neighbors in neighbor_sizes:
            print(f'Training sampling with {num_neighbors} neighbors')
            for batch_size in args.batch_sizes:
                train_loader = NeighborLoader(
                    data,
                    num_neighbors=num_neighbors,
                    input_nodes=train_idx,
                    batch_size=batch_size,
                    shuffle=True,
                    num_workers=args.num_workers,
                )
                runtimes = []
                num_iterations = 0
                for run in range(args.runs):
                    start = default_timer()
                    for batch in tqdm.tqdm(train_loader):
                        num_iterations += 1
                    stop = default_timer()
                    runtimes.append(round(stop - start, 3))
                average_time = round(sum(runtimes) / args.runs, 3)
                print(f'batch size={batch_size}, iterations={num_iterations}, '
                      f'runtimes={runtimes}, average runtime={average_time}')

        print('Evaluation sampling with all neighbors')
        for batch_size in args.eval_batch_sizes:
            subgraph_loader = NeighborLoader(
                data,
                num_neighbors=[-1],
                input_nodes=eval_idx,
                batch_size=batch_size,
                shuffle=False,
                num_workers=args.num_workers,
            )
            runtimes = []
            num_iterations = 0
            for run in range(args.runs):
                start = default_timer()
                for batch in tqdm.tqdm(subgraph_loader):
                    num_iterations += 1
                stop = default_timer()
                runtimes.append(round(stop - start, 3))
            average_time = round(sum(runtimes) / args.runs, 3)
            print(f'batch size={batch_size}, iterations={num_iterations}, '
                  f'runtimes={runtimes}, average runtime={average_time}')