def __init__(self, dataset_name, data_cpu=False, fan_out=[10, 25], device=th.device('cpu'), batch_size=1000, num_workers=4): super().__init__() if dataset_name == 'reddit': g, n_classes = load_reddit() elif dataset_name == 'ogbn-products': g, n_classes = load_ogb('ogbn-products') else: raise ValueError('unknown dataset') train_nid = th.nonzero(g.ndata['train_mask'], as_tuple=True)[0] val_nid = th.nonzero(g.ndata['val_mask'], as_tuple=True)[0] test_nid = th.nonzero(~(g.ndata['train_mask'] | g.ndata['val_mask']), as_tuple=True)[0] sampler = dgl.dataloading.MultiLayerNeighborSampler([int(_) for _ in fan_out]) dataloader_device = th.device('cpu') if not data_cpu: train_nid = train_nid.to(device) val_nid = val_nid.to(device) test_nid = test_nid.to(device) g = g.formats(['csc']) g = g.to(device) dataloader_device = device self.g = g self.train_nid, self.val_nid, self.test_nid = train_nid, val_nid, test_nid self.sampler = sampler self.device = dataloader_device self.batch_size = batch_size self.num_workers = num_workers self.in_feats = g.ndata['features'].shape[1] self.n_classes = n_classes
def main(args): devices = list(map(int, args.gpu.split(','))) n_gpus = len(devices) # load dataset if args.dataset == 'reddit': g, n_classes = load_reddit(self_loop=False) elif args.dataset == 'ogbn-products': g, n_classes = load_ogb('ogbn-products') else: raise Exception('unknown dataset') train_nid = g.ndata.pop('train_mask').nonzero().squeeze() val_nid = g.ndata.pop('val_mask').nonzero().squeeze() test_nid = g.ndata.pop('test_mask').nonzero().squeeze() nfeat = g.ndata.pop('features') labels = g.ndata.pop('labels') # Create csr/coo/csc formats before launching training processes with multi-gpu. # This avoids creating certain formats in each sub-process, which saves memory and CPU. g.create_formats_() # this to avoid competition overhead on machines with many cores. # Change it to a proper number on your machine, especially for multi-GPU training. os.environ['OMP_NUM_THREADS'] = str(mp.cpu_count() // 2 // n_gpus) if n_gpus > 1: # Copy the graph to shared memory explicitly before pinning. # In other cases, we can just rely on fork's copy-on-write. # TODO: the original graph g is not freed. if args.graph_device == 'uva': g = g.shared_memory('g') if args.data_device == 'uva': nfeat = nfeat.share_memory_() labels = labels.share_memory_() # Pack data data = train_nid, val_nid, test_nid, n_classes, g, nfeat, labels if devices[0] == -1: assert args.graph_device == 'cpu', \ f"Must have GPUs to enable {args.graph_device} sampling." assert args.data_device == 'cpu', \ f"Must have GPUs to enable {args.data_device} feature storage." run(0, 0, args, ['cpu'], data) elif n_gpus == 1: run(0, n_gpus, args, devices, data) else: procs = [] for proc_id in range(n_gpus): p = mp.Process(target=run, args=(proc_id, n_gpus, args, devices, data)) p.start() procs.append(p) for p in procs: p.join()
def main(args): devices = list(map(int, args.gpu.split(','))) n_gpus = len(devices) # load dataset if args.dataset == 'reddit': g, n_classes = load_reddit(self_loop=False) elif args.dataset == 'ogbn-products': g, n_classes = load_ogb('ogbn-products') else: raise Exception('unknown dataset') train_nid = g.ndata.pop('train_mask').nonzero().squeeze() val_nid = g.ndata.pop('val_mask').nonzero().squeeze() test_nid = g.ndata.pop('test_mask').nonzero().squeeze() nfeat = g.ndata.pop('features') labels = g.ndata.pop('labels') # Create csr/coo/csc formats before launching training processes with multi-gpu. # This avoids creating certain formats in each sub-process, which saves memory and CPU. g.create_formats_() # this to avoid competition overhead on machines with many cores. # Change it to a proper number on your machine, especially for multi-GPU training. os.environ['OMP_NUM_THREADS'] = str(mp.cpu_count() // 2 // n_gpus) # Pack data data = train_nid, val_nid, test_nid, n_classes, g, nfeat, labels if devices[0] == -1: assert args.graph_device == 'cpu', \ f"Must have GPUs to enable {args.graph_device} sampling." assert args.data_device == 'cpu', \ f"Must have GPUs to enable {args.data_device} feature storage." run(0, 0, args, ['cpu'], data) elif n_gpus == 1: run(0, n_gpus, args, devices, data) else: mp.spawn(run, args=(n_gpus, args, devices, data), nprocs=n_gpus)
help="By default the script puts all node features and labels " "on GPU when using it to save time for data copy. This may " "be undesired if they cannot fit in GPU memory at once. " "Setting this flag makes all node features to be located" "in the unified tensor instead.") args = argparser.parse_args() if args.gpu >= 0: device = th.device('cuda:%d' % args.gpu) else: device = th.device('cpu') if args.dataset == 'reddit': g, n_classes = load_reddit() elif args.dataset == 'ogbn-products': g, n_classes = load_ogb('ogbn-products') else: raise Exception('unknown dataset') if args.inductive: train_g, val_g, test_g = inductive_split(g) train_nfeat = train_g.ndata.pop('features') val_nfeat = val_g.ndata.pop('features') test_nfeat = test_g.ndata.pop('features') train_labels = train_g.ndata.pop('labels') val_labels = val_g.ndata.pop('labels') test_labels = test_g.ndata.pop('labels') else: train_g = val_g = test_g = g train_nfeat = val_nfeat = test_nfeat = g.ndata.pop('features') train_labels = val_labels = test_labels = g.ndata.pop('labels')
default=4, help="Number of sampling processes. Use 0 for no extra process.") argparser.add_argument('--inductive', action='store_true', help="Inductive learning setting") args = argparser.parse_args() if args.gpu >= 0: device = th.device('cuda:%d' % args.gpu) else: device = th.device('cpu') if args.dataset == 'reddit': g, n_classes = load_reddit() elif args.dataset == 'ogbn-products': g, n_classes = load_ogb('ogbn-products') elif args.dataset == 'ogbn-arxiv': g, n_classes = load_ogb('ogbn-arxiv') #print(n_classes) #from IPython import embed; embed() elif args.dataset == 'ogbn-proteins': g, n_classes = load_ogb('ogbn-proteins', device) elif args.dataset == 'ogbn-mag': g, n_classes = load_ogb('ogbn-mag') else: raise Exception('unknown dataset') in_feats = g.ndata['features'].shape[1] if args.inductive: train_g, val_g, test_g = inductive_split(g)
'--num_trainers_per_machine', type=int, default=1, help='the number of trainers per machine. The trainer ids are stored\ in the node feature \'trainer_id\'') argparser.add_argument('--output', type=str, default='data', help='Output path of partitioned graph.') args = argparser.parse_args() start = time.time() if args.dataset == 'reddit': g, _ = load_reddit() elif args.dataset == 'ogb-product': g, _ = load_ogb('ogbn-products') elif args.dataset == 'ogb-paper100M': g, _ = load_ogb('ogbn-papers100M') print('load {} takes {:.3f} seconds'.format(args.dataset, time.time() - start)) print('|V|={}, |E|={}'.format(g.number_of_nodes(), g.number_of_edges())) print('train: {}, valid: {}, test: {}'.format( th.sum(g.ndata['train_mask']), th.sum(g.ndata['val_mask']), th.sum(g.ndata['test_mask']))) if args.balance_train: balance_ntypes = g.ndata['train_mask'] else: balance_ntypes = None if args.undirected: sym_g = dgl.to_bidirected(g, readonly=True)
help="By default the script puts all node features and labels " "on GPU when using it to save time for data copy. This may " "be undesired if they cannot fit in GPU memory at once. " "This flag disables that.") args = argparser.parse_args() if args.gpu >= 0: device = th.device('cuda:%d' % args.gpu) else: device = th.device('cpu') # get_memory("-----------------------------------------before load_ogb***************************") t2 = ttt(tt, "before load_ogb") if args.dataset=='reddit': g, n_classes = load_reddit() if args.dataset=='ogbn-products': g, n_classes = load_ogb(args.dataset) print('#nodes:', g.number_of_nodes()) print('#edges:', g.number_of_edges()) print('#classes:', n_classes) # get_memory("-----------------------------------------after load_ogb***************************") # if args.dataset in ['arxiv', 'collab', 'citation', 'ddi', 'protein', 'ppa', 'reddit.dgl','products']: # g, n_classes = load_data(args.dataset) else: raise Exception('unknown dataset') # see_memory_usage("-----------------------------------------after data to cpu------------------------") t3 = ttt(t2, "after load_ogb") if args.inductive: train_g, val_g, test_g = inductive_split(g) train_nfeat = train_g.ndata.pop('features') val_nfeat = val_g.ndata.pop('features')
"--balance_train", action="store_true", help="balance the training size in each partition.", ) argparser.add_argument( "--balance_edges", action="store_true", help="balance the number of edges in each partition.", ) args = argparser.parse_args() start = time.time() if args.dataset == "reddit": g, _ = load_reddit() elif args.dataset == "ogb-product": g, _ = load_ogb("ogbn-products") elif args.dataset == "ogb-paper100M": g, _ = load_ogb("ogbn-papers100M") print( "load {} takes {:.3f} seconds".format( args.dataset, time.time() - start ) ) print("|V|={}, |E|={}".format(g.number_of_nodes(), g.number_of_edges())) print( "train: {}, valid: {}, test: {}".format( th.sum(g.ndata["train_mask"]), th.sum(g.ndata["val_mask"]), th.sum(g.ndata["test_mask"]), ) )
def main_libra2dgl(resultdir, dataset, nc): """ Converts the output from Libra partitioning to DGL/DistGNN graph input. It builds dictionaries to assign local IDs to nodes in the partitions as well as it build a database to keep track of the location of clone nodes in the remote partitions. Parameters ---------- resultdir : Location where partitions in dgl format are stored dataset : Dataset name nc : Number of partitions Output ------ Creates partX folder in resultdir location for each partition X Notes ----- This output is directly used as input to DistGNN """ tedges = 1615685872 ## total edges max_c = 1024 ## max partitions supported factor = 1.2 ## for pre-allocated tensor size hash_edges = [int((tedges / i) * factor) for i in range(1, max_c + 1)] ## load graph for the feature gather args = Args(dataset) print("Loading data...", flush=True) if args.dataset == 'ogbn-products': print("Loading ogbn-products") g_orig, _ = load_ogb('ogbn-products') elif args.dataset == 'ogbn-papers100M': print("Loading ogbn-papers100M") g_orig, _ = load_ogb('ogbn-papers100M') elif args.dataset == 'proteins': print("Loading proteins") g_orig = load_proteins('proteins') elif args.dataset == 'ogbn-arxiv': print("Loading ogbn-arxiv") g_orig, _ = load_ogb('ogbn-arxiv') else: g_orig = load_data(args)[0] print("Done loading data.", flush=True) a, b = g_orig.edges() N_n = g_orig.number_of_nodes() print("Number of nodes in the graph: ", N_n) node_map = th.zeros(nc, dtype=th.int32) indices = th.zeros(N_n, dtype=th.int32) lftensor = th.zeros(N_n, dtype=th.int32) gdt_key = th.zeros(N_n, dtype=th.int32) gdt_value = th.zeros([N_n, nc], dtype=th.int32) offset = th.zeros(1, dtype=th.int32) ldt_ar = [] gg = [DGLGraph() for i in range(nc)] part_nodes = [] ## Iterator over number of partitions for i in range(nc): g = gg[i] fsize = hash_edges[nc] hash_nodes = th.zeros(2, dtype=th.int32) a = th.zeros(fsize, dtype=th.int64) b = th.zeros(fsize, dtype=th.int64) ldt_key = th.zeros(fsize, dtype=th.int64) ldt_ar.append(ldt_key) ## building node, parition dictionary ## Assign local node ids and mapping to global node ids libra2dgl_build_dict(a, b, indices, ldt_key, gdt_key, gdt_value, node_map, offset, nc, i, fsize, hash_nodes, resultdir) num_nodes = int(hash_nodes[0]) num_edges = int(hash_nodes[1]) part_nodes.append(num_nodes) g.add_edges(a[0:num_edges], b[0:num_edges]) ######################################################## ## fixing lf - 1-level tree for the split-nodes libra2dgl_set_lf(gdt_key, gdt_value, lftensor, nc, N_n) ######################################################## graph_name = dataset part_method = 'Libra' num_parts = nc ## number of paritions/communities num_hops = 0 node_map_val = node_map.tolist() edge_map_val = 0 out_path = resultdir part_metadata = { 'graph_name': graph_name, 'num_nodes': g_orig.number_of_nodes(), 'num_edges': g_orig.number_of_edges(), 'part_method': part_method, 'num_parts': num_parts, 'halo_hops': num_hops, 'node_map': node_map_val, 'edge_map': edge_map_val } ############################################################ for i in range(nc): g = gg[0] num_nodes = part_nodes[i] adj = th.zeros([num_nodes, nc - 1], dtype=th.int32) inner_node = th.zeros(num_nodes, dtype=th.int32) lf = th.zeros(num_nodes, dtype=th.int32) ldt = ldt_ar[0] try: feat = g_orig.ndata['feat'] except: feat = g_orig.ndata['features'] try: labels = g_orig.ndata['label'] except: labels = g_orig.ndata['labels'] trainm = g_orig.ndata['train_mask'] testm = g_orig.ndata['test_mask'] valm = g_orig.ndata['val_mask'] feat_size = feat.shape[1] gfeat = th.zeros([num_nodes, feat_size], dtype=feat.dtype) glabels = th.zeros(num_nodes, dtype=labels.dtype) gtrainm = th.zeros(num_nodes, dtype=trainm.dtype) gtestm = th.zeros(num_nodes, dtype=testm.dtype) gvalm = th.zeros(num_nodes, dtype=valm.dtype) ## build remote node databse per local node ## gather feats, train, test, val, and labels for each partition libra2dgl_build_adjlist(feat, gfeat, adj, inner_node, ldt, gdt_key, gdt_value, node_map, lf, lftensor, num_nodes, nc, i, feat_size, labels, trainm, testm, valm, glabels, gtrainm, gtestm, gvalm, feat.shape[0]) g.ndata['adj'] = adj ## databse of remote clones g.ndata['inner_node'] = inner_node ## split node '0' else '1' g.ndata['feat'] = gfeat ## gathered features g.ndata['lf'] = lf ## 1-level tree among split nodes g.ndata['label'] = glabels g.ndata['train_mask'] = gtrainm g.ndata['test_mask'] = gtestm g.ndata['val_mask'] = gvalm lf = g.ndata['lf'] print("Writing partition {} to file".format(i), flush=True) part = g part_id = i part_dir = os.path.join(out_path, "part" + str(part_id)) node_feat_file = os.path.join(part_dir, "node_feat.dgl") edge_feat_file = os.path.join(part_dir, "edge_feat.dgl") part_graph_file = os.path.join(part_dir, "graph.dgl") part_metadata['part-{}'.format(part_id)] = { 'node_feats': node_feat_file, 'edge_feats': edge_feat_file, 'part_graph': part_graph_file } os.makedirs(part_dir, mode=0o775, exist_ok=True) save_tensors(node_feat_file, part.ndata) save_graphs(part_graph_file, [part]) del g del gg[0] del ldt del ldt_ar[0] with open('{}/{}.json'.format(out_path, graph_name), 'w') as outfile: json.dump(part_metadata, outfile, sort_keys=True, indent=4) return gg, node_map
def main(args): # load and preprocess dataset if args.dataset == 'ogbn-products': print("Loading ogbn-products") g, _ = load_ogb('ogbn-products') elif args.dataset == 'ogbn-papers100M': print("Loading ogbn-papers100M") g, _ = load_ogb('ogbn-papers100M') else: data = load_data(args) g = data[0] features = g.ndata['feat'] try: labels = g.ndata['label'] except: labels = g.ndata['labels'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] #n_classes = data.num_classes n_classes = len(torch.unique(labels[torch.logical_not(torch.isnan(labels))])) n_edges = g.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() print("use cuda:", args.gpu) train_nid = train_mask.nonzero().squeeze() val_nid = val_mask.nonzero().squeeze() test_nid = test_mask.nonzero().squeeze() # graph preprocess and calculate normalization factor #g = dgl.remove_self_loop(g) n_edges = g.number_of_edges() if cuda: g = g.int().to(args.gpu) # create GraphSAGE model model = GraphSAGE(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.aggregator_type) if cuda: model.cuda() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] use_gpu = False enable_profiling = True #False # with torch.autograd.profiler.profile(enable_profiling, use_gpu, True) as prof: for epoch in range(args.n_epochs): tic = time.time() model.train() if epoch >= 3: t0 = time.time() # forward logits = model(g, features) loss = F.cross_entropy(logits[train_nid], labels[train_nid]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) toc = time.time() acc = evaluate(model, g, features, labels, val_nid) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print("Epoch: {:}, time: {:0.4f} sec".format(epoch, toc - tic)) # if enable_profiling: # with open("ogb_full_clxtrb_08-04-2021_10_opt_28.prof", "w") as prof_f: # prof_f.write(prof.key_averages(group_by_input_shape=False).table(sort_by="cpu_time_total")) print() acc = evaluate(model, g, features, labels, test_nid) print("Test Accuracy {:.4f}".format(acc))
help="Number of sampling processes. Use 0 for no extra process.", ) argparser.add_argument( "--inductive", action="store_true", help="Inductive learning setting" ) args = argparser.parse_args() if args.gpu >= 0: device = th.device("cuda:%d" % args.gpu) else: device = th.device("cpu") if args.dataset == "reddit": g, n_classes = load_reddit() elif args.dataset == "ogb-product": g, n_classes = load_ogb("ogbn-products") else: raise Exception("unknown dataset") in_feats = g.ndata["features"].shape[1] g = dgl.as_heterograph(g) if args.inductive: train_g, val_g, test_g = inductive_split(g) else: train_g = val_g = test_g = g prepare_mp(train_g) prepare_mp(val_g) prepare_mp(test_g)
if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument('--dataset', type=str, default='cora') argparser.add_argument('--num-parts', type=int, default=2) argparser.add_argument('--out-dir', type=str, default='./') args = argparser.parse_args() dataset = args.dataset num_community = args.num_parts out_dir = 'Libra_result_' + dataset ## "Libra_result_" prefix is mandatory resultdir = os.path.join(args.out_dir, out_dir) print("Input dataset for partitioning: ", dataset) if args.dataset == 'ogbn-products': print("Loading ogbn-products") G, _ = load_ogb('ogbn-products') elif args.dataset == 'ogbn-papers100M': print("Loading ogbn-papers100M") G, _ = load_ogb('ogbn-papers100M') elif args.dataset == 'proteins': G = load_proteins('proteins') elif args.dataset == 'ogbn-arxiv': print("Loading ogbn-arxiv") G, _ = load_ogb('ogbn-arxiv') else: try: G = load_data(args)[0] except: raise DGLError("Error: Dataset {} not found !!!".format(dataset)) print("Done loading the graph.", flush=True)
def main(args): # load and preprocess dataset ##data = load_data(args) ##g = data[0] ##features = g.ndata['feat'] ##labels = g.ndata['label'] ##train_mask = g.ndata['train_mask'] ##val_mask = g.ndata['val_mask'] ##test_mask = g.ndata['test_mask'] ##n_classes = data.num_classes ##in_feats = features.shape[1] from load_graph import load_ogb g, n_classes = load_ogb('ogbn-arxiv') features = g.ndata['feat'] labels = g.ndata['labels'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_edges = g.num_edges() #print(n_edges) print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() print("use cuda:", args.gpu) train_nid = train_mask.nonzero().squeeze() val_nid = val_mask.nonzero().squeeze() test_nid = test_mask.nonzero().squeeze() # graph preprocess and calculate normalization factor g = dgl.remove_self_loop(g) n_edges = g.number_of_edges() if cuda: g = g.int().to(args.gpu) # create GraphSAGE model model = GraphSAGE(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.aggregator_type) if cuda: model.cuda() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(g, features) loss = F.cross_entropy(logits[train_nid], labels[train_nid]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, g, features, labels, val_nid) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, g, features, labels, test_nid) print("Test Accuracy {:.4f}".format(acc))
def run(g, data): n_classes, node_map, num_parts, rank, world_size = data features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] #n_classes = data.num_classes #n_edges = data.graph.number_of_edges() n_edges = g.number_of_edges() print("""----Data statistics------' #Nodes %d #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (g.number_of_nodes(), n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True th.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() print("use cuda:", args.gpu) train_nid = train_mask.nonzero().squeeze() val_nid = val_mask.nonzero().squeeze() test_nid = test_mask.nonzero().squeeze() # graph preprocess and calculate normalization factor #g = dgl.remove_self_loop(g) n_edges = g.number_of_edges() if cuda: g = g.int().to(args.gpu) # create GraphSAGE model model = GraphSAGE(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.aggregator_type) if cuda: model.cuda() # use optimizer optimizer = th.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) train_size = th.sum(g.ndata['train_mask'][0:g.number_of_nodes()]) gr, _ = load_ogb(args.dataset) gr_features = gr.ndata['feat'] gr_labels = gr.ndata['labels'] gr_test_mask = gr.ndata['test_mask'] gr_test_nid = gr_test_mask.nonzero().squeeze() gr_val_mask = gr.ndata['test_mask'] gr_val_nid = gr_val_mask.nonzero().squeeze() # initialize graph dur = [] for epoch in range(args.n_epochs): tic = time.time() model.train() # forward tic_tf = time.time() logits = model(g, features) toc_tf = time.time() loss = F.cross_entropy(logits[train_nid], labels[train_nid]) optimizer.zero_grad() loss.backward() for param in model.parameters(): if param.requires_grad and param.grad is not None: th.distributed.all_reduce(param.grad.data, op=th.distributed.ReduceOp.SUM) optimizer.step() if args.val: acc, nr, dr = evaluate(model, gr, gr_features, gr_labels, gr_val_nid) cum_acc1 = th.tensor(acc, dtype=th.float32) th.distributed.all_reduce(cum_acc1, op=th.distributed.ReduceOp.SUM) if rank == 0: print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f}" .format(epoch, time.time() - tic, loss.item(), float(cum_acc1) / num_parts), flush=True) toc = time.time() if args.rank == 0: print("Epoch: {} time: {:0.4} sec".format(epoch, toc - tic), flush=True) print() gr, _ = load_ogb(args.dataset) features = gr.ndata['feat'] labels = gr.ndata['labels'] test_mask = gr.ndata['test_mask'] test_nid = test_mask.nonzero().squeeze() acc, nr, dr = evaluate(model, gr, features, labels, test_nid) cum_acc1 = th.tensor(acc, dtype=th.float32) th.distributed.all_reduce(cum_acc1, op=th.distributed.ReduceOp.SUM) if args.rank == 0: print("#############################################################", flush=True) print("Single node accuracy: Avg: {:0.4f}%".format( float(cum_acc1) / num_parts * 100), flush=True) print("#############################################################", flush=True)
default='gpu', help="By default the script puts all node features and labels " "on GPU when using it to save time for data copy. This may " "be undesired if they cannot fit in GPU memory at once. " "Use 'cpu' to keep the features on host memory and " "'uva' to enable UnifiedTensor (GPU zero-copy access on " "pinned host memory).") args = argparser.parse_args() devices = list(map(int, args.gpu.split(','))) n_gpus = len(devices) if args.dataset == 'reddit': g, n_classes = load_reddit() elif args.dataset == 'ogbn-products': g, n_classes = load_ogb('ogbn-products') elif args.dataset == 'ogbn-papers100M': g, n_classes = load_ogb('ogbn-papers100M') g = dgl.add_reverse_edges(g) # convert labels to integer g.ndata['labels'] = th.as_tensor(g.ndata['labels'], dtype=th.int64) g.ndata.pop('year') else: raise Exception('unknown dataset') if args.inductive: train_g, val_g, test_g = inductive_split(g) train_nfeat = train_g.ndata.pop('features') val_nfeat = val_g.ndata.pop('features') test_nfeat = test_g.ndata.pop('features') train_labels = train_g.ndata.pop('labels')
def vertex_cut_partition(num_community, dataset, prefix): """ Performs vertex-cut based grpah partitioning Parameters ---------- num_community : Number of partitions to create dataset : Input graph name to partition prefix : Output location Output ------ Creates X partition folder as XCommunities (say, X=2, so, 2Communities) XCommunities contains communityZ.txt file per parition Z Each such file contains list of edges assigned to that partition. """ args = Args(dataset) print("Input dataset: ", args.dataset) if args.dataset == 'ogbn-products': print("Loading ogbn-products") G, _ = load_ogb('ogbn-products') elif args.dataset == 'ogbn-papers100M': print("Loading ogbn-papers100M") G, _ = load_ogb('ogbn-papers100M') elif args.dataset == 'proteins': G = load_proteins('proteins') elif args.dataset == 'ogbn-arxiv': print("Loading ogbn-arxiv") G, _ = load_ogb('ogbn-arxiv') else: try: G = load_data(args)[0] except: raise DGLError("Error: Dataset {} not found !!!".format(dataset)) print("Done loading the graph.", flush=True) N_n = G.number_of_nodes() # number of nodes N_c = num_community ## number of partitions/communities N_e = G.number_of_edges() community_list = [[] for i in range(N_c)] in_d = G.in_degrees() out_d = G.out_degrees() node_degree = in_d + out_d edgenum_unassigned = node_degree.clone() replication_list = [] u_t, v_t = G.edges() weight_ = th.ones(u_t.shape[0], dtype=th.float32) community_weights = th.zeros(N_c, dtype=th.float32) self_loop = 0 for i in range(len(u_t)): if u_t[i] == v_t[i]: self_loop += 1 print("#self loops in the dataset: ", self_loop) del G ## call to C/C++ code out = th.zeros(u_t.shape[0], dtype=th.int32) libra_vertex_cut(N_c, node_degree, edgenum_unassigned, community_weights, u_t, v_t, weight_, out, N_n, N_e, prefix) return int(community_weights.max())