def get_data(args, device): # load and preprocess dataset data = load_data(args) g = data.g train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] labels = g.ndata['label'] train_nid = np.nonzero(train_mask.data.numpy())[0].astype(np.int64) # Normalize features if args.normalize: feats = g.ndata['feat'] train_feats = feats[train_mask] scaler = sklearn.preprocessing.StandardScaler() scaler.fit(train_feats.data.numpy()) features = scaler.transform(feats.data.numpy()) g.ndata['feat'] = torch.FloatTensor(features) in_feats = g.ndata['feat'].shape[1] n_classes = data.num_classes n_edges = g.number_of_edges() g = g.long() # create the cluster gcn iterator cluster_iterator = ClusterIter(args.dataset, g, args.psize, args.batch_size, train_nid, use_pp=args.use_pp) # set device for dataset tensors val_mask = val_mask.to(device) test_mask = test_mask.to(device) g = g.int().to(device) return (g, cluster_iterator, train_mask, val_mask, test_mask, labels, train_nid, in_feats, n_classes, n_edges)
graph.ndata['labels'] = labels mask = th.zeros(num_nodes, dtype=th.bool) mask[train_idx] = True graph.ndata['train_mask'] = mask mask = th.zeros(num_nodes, dtype=th.bool) mask[val_idx] = True graph.ndata['valid_mask'] = mask mask = th.zeros(num_nodes, dtype=th.bool) mask[test_idx] = True graph.ndata['test_mask'] = mask graph.in_degrees(0) graph.out_degrees(0) graph.find_edges(0) cluster_iter_data = ClusterIter('ogbn-products', graph, args.num_partitions, args.batch_size) cluster_iterator = DataLoader(cluster_iter_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=0, collate_fn=partial(subgraph_collate_fn, graph)) in_feats = graph.ndata['feat'].shape[1] n_classes = (labels.max() + 1).item() # Pack data data = train_idx, val_idx, test_idx, in_feats, labels, n_classes, graph, cluster_iterator # Run 10 times test_accs = []
def main(args): torch.manual_seed(args.rnd_seed) np.random.seed(args.rnd_seed) random.seed(args.rnd_seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False multitask_data = set(['ppi', 'amazon', 'amazon-0.1', 'amazon-0.3', 'amazon2M', 'amazon2M-47']) multitask = args.dataset in multitask_data # load and preprocess dataset data = load_data(args) train_nid = np.nonzero(data.train_mask)[0].astype(np.int64) test_nid = np.nonzero(data.test_mask)[0].astype(np.int64) # Normalize features if args.normalize: train_feats = data.features[train_nid] scaler = sklearn.preprocessing.StandardScaler() scaler.fit(train_feats) features = scaler.transform(data.features) else: features = data.features features = torch.FloatTensor(features) if not multitask: labels = torch.LongTensor(data.labels) else: labels = torch.FloatTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask).type(torch.bool) val_mask = torch.ByteTensor(data.val_mask).type(torch.bool) test_mask = torch.ByteTensor(data.test_mask).type(torch.bool) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() n_train_samples = train_mask.sum().item() n_val_samples = val_mask.sum().item() n_test_samples = test_mask.sum().item() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples)) # create GCN model g = data.graph if args.self_loop and not args.dataset.startswith('reddit'): g.remove_edges_from(g.selfloop_edges()) g.add_edges_from(zip(g.nodes(), g.nodes())) print("adding self-loop edges") g = DGLGraph(g, readonly=True) # set device for dataset tensors if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() print(torch.cuda.get_device_name(0)) g.ndata['features'] = features g.ndata['labels'] = labels g.ndata['train_mask'] = train_mask print('labels shape:', labels.shape) cluster_iterator = ClusterIter( args.dataset, g, args.psize, args.batch_size, train_nid, use_pp=args.use_pp) print("features shape, ", features.shape) model_sel = {'GCN': GCNCluster, 'graphsage': GraphSAGE} model_class = model_sel[args.model_type] print('using model:', model_class) model = model_class(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.use_pp) if cuda: model.cuda() # logger and so on log_dir = save_log_dir(args) writer = SummaryWriter(log_dir) logger = Logger(os.path.join(log_dir, 'loggings')) logger.write(args) # Loss function if multitask: print('Using multi-label loss') loss_f = nn.BCEWithLogitsLoss() else: print('Using multi-class loss') loss_f = nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] # set train_nids to cuda tensor if cuda: train_nid = torch.from_numpy(train_nid).cuda() print("current memory after model before training", torch.cuda.memory_allocated(device=train_nid.device) / 1024 / 1024) start_time = time.time() best_f1 = -1 for epoch in range(args.n_epochs): for j, cluster in enumerate(cluster_iterator): # sync with upper level training graph cluster.copy_from_parent() model.train() # forward pred = model(cluster) batch_labels = cluster.ndata['labels'] batch_train_mask = cluster.ndata['train_mask'] loss = loss_f(pred[batch_train_mask], batch_labels[batch_train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() # in PPI case, `log_every` is chosen to log one time per epoch. # Choose your log freq dynamically when you want more info within one epoch if j % args.log_every == 0: print(f"epoch:{epoch}/{args.n_epochs}, Iteration {j}/{len(cluster_iterator)}:training loss", loss.item()) writer.add_scalar('train/loss', loss.item(), global_step=j + epoch * len(cluster_iterator)) print("current memory:", torch.cuda.memory_allocated(device=pred.device) / 1024 / 1024) # evaluate if epoch % args.val_every == 0: val_f1_mic, val_f1_mac = evaluate( model, g, labels, val_mask, multitask) print( "Val F1-mic{:.4f}, Val F1-mac{:.4f}". format(val_f1_mic, val_f1_mac)) if val_f1_mic > best_f1: best_f1 = val_f1_mic print('new best val f1:', best_f1) torch.save(model.state_dict(), os.path.join( log_dir, 'best_model.pkl')) writer.add_scalar('val/f1-mic', val_f1_mic, global_step=epoch) writer.add_scalar('val/f1-mac', val_f1_mac, global_step=epoch) end_time = time.time() print(f'training using time {start_time-end_time}') # test if args.use_val: model.load_state_dict(torch.load(os.path.join( log_dir, 'best_model.pkl'))) test_f1_mic, test_f1_mac = evaluate( model, g, labels, test_mask, multitask) print( "Test F1-mic{:.4f}, Test F1-mac{:.4f}". format(test_f1_mic, test_f1_mac)) writer.add_scalar('test/f1-mic', test_f1_mic) writer.add_scalar('test/f1-mac', test_f1_mac)
def main(args): torch.manual_seed(args.rnd_seed) np.random.seed(args.rnd_seed) random.seed(args.rnd_seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False multitask_data = set(['ppi']) multitask = args.dataset in multitask_data # load and preprocess dataset assert args.dataset == 'amazon2m' g, graph_labels = load_graphs( '/yushi/dataset/Amazon2M/Amazon2M_dglgraph.bin') assert len(g) == 1 g = g[0] data = g.ndata labels = torch.LongTensor(data['label']) if hasattr(torch, 'BoolTensor'): train_mask = data['train_mask'].bool() val_mask = data['val_mask'].bool() test_mask = data['test_mask'].bool() train_nid = np.nonzero(train_mask.cpu().numpy())[0].astype(np.int64) val_nid = np.nonzero(val_mask.cpu().numpy())[0].astype(np.int64) # Normalize features features = torch.FloatTensor(data['feat']) if args.normalize: train_feats = features[train_nid] scaler = sklearn.preprocessing.StandardScaler() scaler.fit(train_feats) features = scaler.transform(features) features = torch.FloatTensor(features) in_feats = features.shape[1] n_classes = 47 n_edges = g.number_of_edges() n_train_samples = train_mask.int().sum().item() n_val_samples = val_mask.int().sum().item() n_test_samples = test_mask.int().sum().item() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples)) # create GCN model if args.self_loop: print("adding self-loop edges") g = add_self_loop(g) # g = DGLGraph(g, readonly=True) # set device for dataset tensors if args.gpu < 0: cuda = False raise ValueError('no cuda') else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() print(torch.cuda.get_device_name(0)) g.ndata['features'] = features g.ndata['labels'] = labels g.ndata['train_mask'] = train_mask print('labels shape:', labels.shape) train_cluster_iterator = ClusterIter( args.dataset, g, args.psize, args.batch_size, train_nid, use_pp=args.use_pp) val_cluster_iterator = ClusterIter( args.dataset, g, args.psize_val, 1, val_nid, use_pp=False) print("features shape, ", features.shape) model = GraphSAGE(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.use_pp) if cuda: model.cuda() # logger and so on log_dir = save_log_dir(args) writer = SummaryWriter(log_dir) logger = Logger(os.path.join(log_dir, 'loggings')) logger.write(args) # Loss function if multitask: print('Using multi-label loss') loss_f = nn.BCEWithLogitsLoss() else: print('Using multi-class loss') loss_f = nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # set train_nids to cuda tensor if cuda: train_nid = torch.from_numpy(train_nid).cuda() print("current memory after model before training", torch.cuda.memory_allocated(device=train_nid.device) / 1024 / 1024) start_time = time.time() best_f1 = -1 for epoch in range(args.n_epochs): for j, cluster in enumerate(train_cluster_iterator): # sync with upper level training graph cluster.copy_from_parent() model.train() # forward pred = model(cluster) batch_labels = cluster.ndata['labels'] batch_train_mask = cluster.ndata['train_mask'] loss = loss_f(pred[batch_train_mask], batch_labels[batch_train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() # in PPI case, `log_every` is chosen to log one time per epoch. # Choose your log freq dynamically when you want more info within one epoch if j % args.log_every == 0: print(f"epoch:{epoch}/{args.n_epochs}, Iteration {j}/" f"{len(train_cluster_iterator)}:training loss", loss.item()) writer.add_scalar('train/loss', loss.item(), global_step=j + epoch * len(train_cluster_iterator)) print("current memory:", torch.cuda.memory_allocated(device=pred.device) / 1024 / 1024) # evaluate if epoch % args.val_every == 0: total_f1_mic = [] total_f1_mac = [] model.eval() for j, cluster in enumerate(val_cluster_iterator): cluster.copy_from_parent() with torch.no_grad(): logits = model(cluster) batch_labels = cluster.ndata['labels'] # batch_val_mask = cluster.ndata['val_mask'] val_f1_mic, val_f1_mac = calc_f1(batch_labels.cpu().numpy(), logits.cpu().numpy(), multitask) total_f1_mic.append(val_f1_mic) total_f1_mac.append(val_f1_mac) val_f1_mic = np.mean(total_f1_mic) val_f1_mac = np.mean(total_f1_mac) print( "Val F1-mic{:.4f}, Val F1-mac{:.4f}". format(val_f1_mic, val_f1_mac)) if val_f1_mic > best_f1: best_f1 = val_f1_mic print('new best val f1:', best_f1) torch.save(model.state_dict(), os.path.join( log_dir, 'best_model.pkl')) writer.add_scalar('val/f1-mic', val_f1_mic, global_step=epoch) writer.add_scalar('val/f1-mac', val_f1_mac, global_step=epoch) end_time = time.time() print(f'training using time {start_time-end_time}') # test if args.use_val: model.load_state_dict(torch.load(os.path.join( log_dir, 'best_model.pkl')))
mask = th.zeros(num_nodes, dtype=th.bool) mask[train_idx] = True graph.ndata['train_mask'] = mask mask = th.zeros(num_nodes, dtype=th.bool) mask[val_idx] = True graph.ndata['valid_mask'] = mask mask = th.zeros(num_nodes, dtype=th.bool) mask[test_idx] = True graph.ndata['test_mask'] = mask graph.in_degree(0) graph.out_degree(0) graph.find_edges(0) cluster_iter_data = ClusterIter('ogbn-products', graph, args.num_partitions, args.batch_size, th.cat([train_idx, val_idx, test_idx])) idx = th.arange(args.num_partitions // args.batch_size) cluster_iterator = DataLoader(cluster_iter_data, batch_size=32, shuffle=True, pin_memory=True, num_workers=4, collate_fn=partial(subgraph_collate_fn, graph)) in_feats = graph.ndata['feat'].shape[1] print(in_feats) n_classes = (labels.max() + 1).item() # Pack data data = train_idx, val_idx, test_idx, in_feats, labels, n_classes, graph, cluster_iterator
def main(args): torch.manual_seed(args.rnd_seed) np.random.seed(args.rnd_seed) random.seed(args.rnd_seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False multitask_data = set(['ppi']) multitask = args.dataset in multitask_data # load and preprocess dataset data = load_data(args) g = data.g train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] labels = g.ndata['label'] train_nid = np.nonzero(train_mask.data.numpy())[0].astype(np.int64) # Normalize features if args.normalize: feats = g.ndata['feat'] train_feats = feats[train_mask] scaler = sklearn.preprocessing.StandardScaler() scaler.fit(train_feats.data.numpy()) features = scaler.transform(feats.data.numpy()) g.ndata['feat'] = torch.FloatTensor(features) in_feats = g.ndata['feat'].shape[1] n_classes = data.num_classes n_edges = g.number_of_edges() n_train_samples = train_mask.int().sum().item() n_val_samples = val_mask.int().sum().item() n_test_samples = test_mask.int().sum().item() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples)) # create GCN model if args.self_loop and not args.dataset.startswith('reddit'): g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) print("adding self-loop edges") # metis only support int64 graph g = g.long() cluster_iterator = ClusterIter(args.dataset, g, args.psize, args.batch_size, train_nid, use_pp=args.use_pp) # set device for dataset tensors if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) val_mask = val_mask.cuda() test_mask = test_mask.cuda() g = g.int().to(args.gpu) print('labels shape:', g.ndata['label'].shape) print("features shape, ", g.ndata['feat'].shape) model = GraphSAGE(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.use_pp) if cuda: model.cuda() # logger and so on log_dir = save_log_dir(args) logger = Logger(os.path.join(log_dir, 'loggings')) logger.write(args) # Loss function if multitask: print('Using multi-label loss') loss_f = nn.BCEWithLogitsLoss() else: print('Using multi-class loss') loss_f = nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # set train_nids to cuda tensor if cuda: train_nid = torch.from_numpy(train_nid).cuda() print( "current memory after model before training", torch.cuda.memory_allocated(device=train_nid.device) / 1024 / 1024) start_time = time.time() best_f1 = -1 for epoch in range(args.n_epochs): for j, cluster in enumerate(cluster_iterator): # sync with upper level training graph if cuda: cluster = cluster.to(torch.cuda.current_device()) model.train() # forward pred = model(cluster) batch_labels = cluster.ndata['label'] batch_train_mask = cluster.ndata['train_mask'] loss = loss_f(pred[batch_train_mask], batch_labels[batch_train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() # in PPI case, `log_every` is chosen to log one time per epoch. # Choose your log freq dynamically when you want more info within one epoch if j % args.log_every == 0: print( f"epoch:{epoch}/{args.n_epochs}, Iteration {j}/" f"{len(cluster_iterator)}:training loss", loss.item()) print("current memory:", torch.cuda.memory_allocated(device=pred.device) / 1024 / 1024) # evaluate if epoch % args.val_every == 0: val_f1_mic, val_f1_mac = evaluate(model, g, labels, val_mask, multitask) print("Val F1-mic{:.4f}, Val F1-mac{:.4f}".format( val_f1_mic, val_f1_mac)) if val_f1_mic > best_f1: best_f1 = val_f1_mic print('new best val f1:', best_f1) torch.save(model.state_dict(), os.path.join(log_dir, 'best_model.pkl')) end_time = time.time() print(f'training using time {start_time-end_time}') # test if args.use_val: model.load_state_dict( torch.load(os.path.join(log_dir, 'best_model.pkl'))) test_f1_mic, test_f1_mac = evaluate(model, g, labels, test_mask, multitask) print("Test F1-mic{:.4f}, Test F1-mac{:.4f}".format( test_f1_mic, test_f1_mac))
def main(args): torch.manual_seed(args.rnd_seed) np.random.seed(args.rnd_seed) random.seed(args.rnd_seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # load and preprocess dataset data = load_data(args) g = data.g train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] labels = g.ndata['label'] train_nid = np.nonzero(train_mask.data.numpy())[0].astype(np.int64) # Normalize features if args.normalize: feats = g.ndata['feat'] train_feats = feats[train_mask] scaler = sklearn.preprocessing.StandardScaler() scaler.fit(train_feats.data.numpy()) features = scaler.transform(feats.data.numpy()) g.ndata['feat'] = torch.FloatTensor(features) in_feats = g.ndata['feat'].shape[1] n_classes = data.num_classes g = g.long() # create GCN model cluster_iterator = ClusterIter(args.dataset, g, args.psize, args.batch_size, train_nid, use_pp=args.use_pp) # set device for dataset tensors if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) val_mask = val_mask.cuda() test_mask = test_mask.cuda() g = g.int().to(args.gpu) print('labels shape:', g.ndata['label'].shape) print("features shape, ", g.ndata['feat'].shape) if args.model_type == 'sage': model = GCN(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.use_layernorm, False, False, 1, True) else: raise NotImplementedError( f'{args.model_type} is not a supported model type') if cuda: model.cuda() # use optimizer loss_f = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # set train_nids to cuda tensor if cuda: train_nid = torch.from_numpy(train_nid).cuda() total_time = 0. val_accs = [] test_accs = [] for epoch in range(args.n_epochs): print(f'Running epoch {epoch} / {args.n_epochs}', flush=True) start_time = time.time() for j, cluster in enumerate(cluster_iterator): # sync with upper level training graph if cuda: cluster = cluster.to(torch.cuda.current_device()) model.train() # forward pred = model(cluster) batch_labels = cluster.ndata['label'] batch_train_mask = cluster.ndata['train_mask'] loss = loss_f(pred[batch_train_mask], batch_labels[batch_train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() end_time = time.time() elapsed_time = end_time - start_time total_time += elapsed_time # evaluate # do NOT include evaluation within the timing metrics if args.eval_cpu: model.to('cpu') if args.use_f1: val_acc = evaluate(model, g.cpu(), labels.cpu(), val_mask.cpu(), 'f1') test_acc = evaluate(model, g.cpu(), labels.cpu(), test_mask.cpu(), 'f1') else: val_acc = evaluate(model, g.cpu(), labels.cpu(), val_mask.cpu()) test_acc = evaluate(model, g.cpu(), labels.cpu(), test_mask.cpu()) model.cuda() else: if args.use_f1: val_acc = evaluate(model, g, labels, val_mask, 'f1') test_acc = evaluate(model, g, labels, test_mask, 'f1') else: val_acc = evaluate(model, g, labels, val_mask) test_acc = evaluate(model, g, labels, test_mask) val_accs.append(val_acc) test_accs.append(test_acc) print(f'Val acc {val_acc}', flush=True) print(f'Training Time: {total_time:.4f}', flush=True) print(f'Last Val: {val_accs[-1]:.4f}', flush=True) print(f'Best Val: {max(val_accs):.4f}', flush=True) print(f'Last Test: {test_accs[-1]:.4f}', flush=True) print(f'Best Test: {max(test_accs):.4f}', flush=True) plt.plot(val_accs) title = args.fig_name plt.title(title) os.makedirs(args.fig_dir, exist_ok=True) plt.savefig(os.path.join(args.fig_dir, title + '.png'))