def __init__(self, dataset_name, data_cpu=False, fan_out=[10, 25], device=th.device('cpu'), batch_size=1000, num_workers=4): super().__init__() if dataset_name == 'reddit': g, n_classes = load_reddit() elif dataset_name == 'ogbn-products': g, n_classes = load_ogb('ogbn-products') else: raise ValueError('unknown dataset') train_nid = th.nonzero(g.ndata['train_mask'], as_tuple=True)[0] val_nid = th.nonzero(g.ndata['val_mask'], as_tuple=True)[0] test_nid = th.nonzero(~(g.ndata['train_mask'] | g.ndata['val_mask']), as_tuple=True)[0] sampler = dgl.dataloading.MultiLayerNeighborSampler([int(_) for _ in fan_out]) dataloader_device = th.device('cpu') if not data_cpu: train_nid = train_nid.to(device) val_nid = val_nid.to(device) test_nid = test_nid.to(device) g = g.formats(['csc']) g = g.to(device) dataloader_device = device self.g = g self.train_nid, self.val_nid, self.test_nid = train_nid, val_nid, test_nid self.sampler = sampler self.device = dataloader_device self.batch_size = batch_size self.num_workers = num_workers self.in_feats = g.ndata['features'].shape[1] self.n_classes = n_classes
def main(args): devices = list(map(int, args.gpu.split(','))) n_gpus = len(devices) # load dataset if args.dataset == 'reddit': g, n_classes = load_reddit(self_loop=False) elif args.dataset == 'ogbn-products': g, n_classes = load_ogb('ogbn-products') else: raise Exception('unknown dataset') train_nid = g.ndata.pop('train_mask').nonzero().squeeze() val_nid = g.ndata.pop('val_mask').nonzero().squeeze() test_nid = g.ndata.pop('test_mask').nonzero().squeeze() nfeat = g.ndata.pop('features') labels = g.ndata.pop('labels') # Create csr/coo/csc formats before launching training processes with multi-gpu. # This avoids creating certain formats in each sub-process, which saves memory and CPU. g.create_formats_() # this to avoid competition overhead on machines with many cores. # Change it to a proper number on your machine, especially for multi-GPU training. os.environ['OMP_NUM_THREADS'] = str(mp.cpu_count() // 2 // n_gpus) if n_gpus > 1: # Copy the graph to shared memory explicitly before pinning. # In other cases, we can just rely on fork's copy-on-write. # TODO: the original graph g is not freed. if args.graph_device == 'uva': g = g.shared_memory('g') if args.data_device == 'uva': nfeat = nfeat.share_memory_() labels = labels.share_memory_() # Pack data data = train_nid, val_nid, test_nid, n_classes, g, nfeat, labels if devices[0] == -1: assert args.graph_device == 'cpu', \ f"Must have GPUs to enable {args.graph_device} sampling." assert args.data_device == 'cpu', \ f"Must have GPUs to enable {args.data_device} feature storage." run(0, 0, args, ['cpu'], data) elif n_gpus == 1: run(0, n_gpus, args, devices, data) else: procs = [] for proc_id in range(n_gpus): p = mp.Process(target=run, args=(proc_id, n_gpus, args, devices, data)) p.start() procs.append(p) for p in procs: p.join()
def main(args): devices = list(map(int, args.gpu.split(','))) n_gpus = len(devices) # load dataset if args.dataset == 'reddit': g, n_classes = load_reddit(self_loop=False) elif args.dataset == 'ogbn-products': g, n_classes = load_ogb('ogbn-products') else: raise Exception('unknown dataset') train_nid = g.ndata.pop('train_mask').nonzero().squeeze() val_nid = g.ndata.pop('val_mask').nonzero().squeeze() test_nid = g.ndata.pop('test_mask').nonzero().squeeze() nfeat = g.ndata.pop('features') labels = g.ndata.pop('labels') # Create csr/coo/csc formats before launching training processes with multi-gpu. # This avoids creating certain formats in each sub-process, which saves memory and CPU. g.create_formats_() # this to avoid competition overhead on machines with many cores. # Change it to a proper number on your machine, especially for multi-GPU training. os.environ['OMP_NUM_THREADS'] = str(mp.cpu_count() // 2 // n_gpus) # Pack data data = train_nid, val_nid, test_nid, n_classes, g, nfeat, labels if devices[0] == -1: assert args.graph_device == 'cpu', \ f"Must have GPUs to enable {args.graph_device} sampling." assert args.data_device == 'cpu', \ f"Must have GPUs to enable {args.data_device} feature storage." run(0, 0, args, ['cpu'], data) elif n_gpus == 1: run(0, n_gpus, args, devices, data) else: mp.spawn(run, args=(n_gpus, args, devices, data), nprocs=n_gpus)
'--data-cpu', action='store_true', help="By default the script puts all node features and labels " "on GPU when using it to save time for data copy. This may " "be undesired if they cannot fit in GPU memory at once. " "Setting this flag makes all node features to be located" "in the unified tensor instead.") args = argparser.parse_args() if args.gpu >= 0: device = th.device('cuda:%d' % args.gpu) else: device = th.device('cpu') if args.dataset == 'reddit': g, n_classes = load_reddit() elif args.dataset == 'ogbn-products': g, n_classes = load_ogb('ogbn-products') else: raise Exception('unknown dataset') if args.inductive: train_g, val_g, test_g = inductive_split(g) train_nfeat = train_g.ndata.pop('features') val_nfeat = val_g.ndata.pop('features') test_nfeat = test_g.ndata.pop('features') train_labels = train_g.ndata.pop('labels') val_labels = val_g.ndata.pop('labels') test_labels = test_g.ndata.pop('labels') else: train_g = val_g = test_g = g
help='balance the number of edges in each partition.') argparser.add_argument( '--num_trainers_per_machine', type=int, default=1, help='the number of trainers per machine. The trainer ids are stored\ in the node feature \'trainer_id\'') argparser.add_argument('--output', type=str, default='data', help='Output path of partitioned graph.') args = argparser.parse_args() start = time.time() if args.dataset == 'reddit': g, _ = load_reddit() elif args.dataset == 'ogb-product': g, _ = load_ogb('ogbn-products') elif args.dataset == 'ogb-paper100M': g, _ = load_ogb('ogbn-papers100M') print('load {} takes {:.3f} seconds'.format(args.dataset, time.time() - start)) print('|V|={}, |E|={}'.format(g.number_of_nodes(), g.number_of_edges())) print('train: {}, valid: {}, test: {}'.format( th.sum(g.ndata['train_mask']), th.sum(g.ndata['val_mask']), th.sum(g.ndata['test_mask']))) if args.balance_train: balance_ntypes = g.ndata['train_mask'] else: balance_ntypes = None