def test(adj): ''' test on GCN ''' adj = normalize_adj_tensor(adj) gcn = GCN(nfeat=features.shape[1], nhid=args.hidden, nclass=labels.max().item() + 1, dropout=0.5) if device != 'cpu': gcn = gcn.to(device) optimizer = optim.Adam(gcn.parameters(), lr=args.lr, weight_decay=5e-4) gcn.train() for epoch in range(args.epochs): optimizer.zero_grad() output = gcn(features, adj) loss_train = F.nll_loss(output[idx_train], labels[idx_train]) acc_train = accuracy(output[idx_train], labels[idx_train]) loss_train.backward() optimizer.step() gcn.eval() output = gcn(features, adj) loss_test = F.nll_loss(output[idx_test], labels[idx_test]) acc_test = accuracy(output[idx_test], labels[idx_test]) # print("Test set results:", # "loss= {:.4f}".format(loss_test.item()), # "accuracy= {:.4f}".format(acc_test.item())) return acc_test.item()
def run(args): gl.set_tape_capacity(1) g = load_graph(args) if args.use_mp: gl.set_tracker_mode(0) thg.set_client_num(args.client_num) thg.launch_server(g) else: g.init(task_index=args.rank, task_count=args.world_size) # TODO(baole): This is an estimate and an accurate value will be needed from graphlearn. length_per_worker = args.train_length // args.train_batch_size // args.world_size print('length_per_worker being set to: ' + str(length_per_worker)) # data loader train_query = query(g, args, mask=gl.Mask.TRAIN) if args.use_mp: train_dataset = thg.Dataset(train_query, window=5, induce_func=induce_func, graph=g) else: train_dataset = thg.Dataset(train_query, window=5, induce_func=induce_func) train_loader = thg.PyGDataLoader(train_dataset, multi_process=args.use_mp, length=length_per_worker) test_query = query(g, args, mask=gl.Mask.TEST) if args.use_mp: test_dataset = thg.Dataset(test_query, window=5, induce_func=induce_func, graph=g) else: test_dataset = thg.Dataset(test_query, window=5, induce_func=induce_func) test_loader = thg.PyGDataLoader(test_dataset, multi_process=args.use_mp) # define model model = GCN(input_dim=args.features_num, hidden_dim=args.hidden_dim, output_dim=args.class_num, depth=args.depth, drop_rate=args.drop_rate).to(device) if dist.is_initialized(): model = torch.nn.parallel.DistributedDataParallel(model) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) # train and test for epoch in range(0, args.epoch): train(model, train_loader, optimizer, args) test_acc = test(model, test_loader, args) log = 'Epoch: {:03d}, Test: {:.4f}' print(log.format(epoch, test_acc)) if not args.use_mp: g.close()
def main(): model = GCN(34, 32, 2) # model.load_state_dict(torch.load('model_40.pth')) if torch.cuda.is_available(): model = model.cuda() # model._initialize() print(my_cfg) optimizer = torch.optim.Adam(model.parameters(), lr=my_cfg['lr']) #optimizer = torch.optim.RMSprop(model.parameters(), lr=my_cfg['lr']) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=my_cfg['milestones'], gamma=0.1) train(model, optimizer, lr_scheduler) return
def train_gcn(): dataset = 'Cora' path = osp.join(osp.dirname(osp.realpath(__file__)), '.', 'data', dataset) dataset = Planetoid(path, dataset, T.NormalizeFeatures()) data = dataset[0] from gcn import GCN num_nodes = data.x.size(0) input_dim = data.x.size(1) hidden_dim = 16 num_classes = 7 model = GCN(input_dim, hidden_dim, num_classes) optimizer = torch.optim.Adam(model.parameters(), lr=0.02, weight_decay=0) for epoch in range(1, 2000): optimizer.zero_grad() output = model.forward_(data.x, data.edge_index) loss = model.loss(output, data.y) loss.backward() optimizer.step() acc = output.max(1)[1].eq(data.y).sum().item() / num_nodes print('epoch=%d loss=%f acc=%f' % (epoch, loss.item(), acc))
def main(args): path = os.path.join(args.dataDir, args.dataset + ".npz") data = custom_dataset(path, args.dim, args.classes, load_from_txt=False) g = data.g if args.gpu < 0: cuda = False else: cuda = True g = g.int().to(args.gpu) features = data.x labels = data.y in_feats = features.size(1) n_classes = data.num_classes # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) if args.model == 'gcn': model = GCN(g, in_feats=in_feats, n_hidden=args.hidden, n_classes=n_classes, n_layers=2) else: model = GIN(g, input_dim=in_feats, hidden_dim=64, output_dim=n_classes, num_layers=5) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) torch.cuda.synchronize() start = time.perf_counter() for _ in tqdm(range(args.n_epochs)): model.train() logits = model(features) loss = loss_fcn(logits[:], labels[:]) optimizer.zero_grad() loss.backward() optimizer.step() torch.cuda.synchronize() dur = time.perf_counter() - start if args.model == 'gcn': print("DGL GCN (L2-H16) Time: (ms) {:.3f}".format(dur * 1e3 / args.n_epochs)) else: print("DGL GIN (L5-H64) Time: (ms) {:.3f}".format(dur * 1e3 / args.n_epochs)) print()
norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) # norm gcn = GCN(g, in_feats=n_lag, n_hidden=64, n_classes=1, n_layers=2, activation=F.relu, dropout=0.5).cuda() loss_fcn = torch.nn.MSELoss() optimizer = torch.optim.Adam(gcn.parameters(), lr=1e-3, weight_decay=5e-4) # Train n2t = lambda arr: torch.from_numpy(np.array(arr)).cuda().float() def format_batch(inds, data, squeeze=True): batch = [] labels = [] lmasks = [] for di, hi in inds: X = data[di][hi - n_lag:hi].T.copy() X[np.isnan(X)] = -1 Y = data[di][hi:hi + 1].T.copy() mask = np.logical_not(np.isnan(Y))
)) # one hot encode nodes for features (replace with doc2vec in future) print(G.nodes[2].data['feat']) # The first layer transforms input features of size of 34 to a hidden size of 5. # The second layer transforms the hidden layer and produces output features of # size 2, corresponding to the two groups of the karate club. net = GCN(G.number_of_nodes(), 30, 2) inputs = torch.eye(G.number_of_nodes()) labeled_nodes = torch.tensor( [wantedNum, unwantedNum]) # only the instructor and the president nodes are labeled labels = torch.tensor([0, 1]) # their labels are different optimizer = torch.optim.Adam(net.parameters(), lr=0.01) all_logits = [] for epoch in range(numEpochs): logits = net(G, inputs) # we save the logits for visualization later all_logits.append(logits.detach()) logp = F.log_softmax(logits, 1) # we only compute loss for labeled nodes loss = F.nll_loss(logp[labeled_nodes], labels) optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch %d | Loss: %6.3e' % (epoch, loss.item()))
class BaseMeta(Module): def __init__(self, nfeat, hidden_sizes, nclass, nnodes, dropout, train_iters, attack_features, lambda_, device, with_bias=False, lr=0.01, with_relu=False): super(BaseMeta, self).__init__() self.hidden_sizes = hidden_sizes self.nfeat = nfeat self.nclass = nclass self.with_bias = with_bias self.with_relu = with_relu self.gcn = GCN(nfeat=nfeat, nhid=hidden_sizes[0], nclass=nclass, dropout=0.5, with_relu=False) self.train_iters = train_iters self.surrogate_optimizer = optim.Adam(self.gcn.parameters(), lr=lr, weight_decay=5e-4) self.attack_features = attack_features self.lambda_ = lambda_ self.device = device self.nnodes = nnodes self.adj_changes = Parameter(torch.FloatTensor(nnodes, nnodes)) self.adj_changes.data.fill_(0) def filter_potential_singletons(self, modified_adj): """ Computes a mask for entries potentially leading to singleton nodes, i.e. one of the two nodes corresponding to the entry have degree 1 and there is an edge between the two nodes. Returns ------- torch.Tensor shape [N, N], float with ones everywhere except the entries of potential singleton nodes, where the returned tensor has value 0. """ degrees = modified_adj.sum(0) degree_one = (degrees == 1) resh = degree_one.repeat(modified_adj.shape[0], 1).float() l_and = resh * modified_adj logical_and_symmetric = l_and + l_and.t() flat_mask = 1 - logical_and_symmetric return flat_mask def train_surrogate(self, features, adj, labels, idx_train, train_iters=200): print( '=== training surrogate model to predict unlabled data for self-training' ) surrogate = self.gcn surrogate.initialize() adj_norm = utils.normalize_adj_tensor(adj) surrogate.train() for i in range(train_iters): self.surrogate_optimizer.zero_grad() output = surrogate(features, adj_norm) loss_train = F.nll_loss(output[idx_train], labels[idx_train]) loss_train.backward() self.surrogate_optimizer.step() # Predict the labels of the unlabeled nodes to use them for self-training. surrogate.eval() output = surrogate(features, adj_norm) labels_self_training = output.argmax(1) labels_self_training[idx_train] = labels[idx_train] # reset parameters for later updating surrogate.initialize() return labels_self_training def log_likelihood_constraint(self, modified_adj, ori_adj, ll_cutoff): """ Computes a mask for entries that, if the edge corresponding to the entry is added/removed, would lead to the log likelihood constraint to be violated. """ t_d_min = torch.tensor(2.0).to(self.device) t_possible_edges = np.array( np.triu(np.ones((self.nnodes, self.nnodes)), k=1).nonzero()).T allowed_mask, current_ratio = utils.likelihood_ratio_filter( t_possible_edges, modified_adj, ori_adj, t_d_min, ll_cutoff) return allowed_mask, current_ratio
def main(args): # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() n_nodes = data.graph.number_of_nodes() print("""----Data statistics------' #Nodes %d #Edges %d #Feature %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_nodes, n_edges, in_feats, n_classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess and calculate normalization factor g = data.graph # add self loop if args.self_loop: g.remove_edges_from(g.selfloop_edges()) g.add_edges_from(zip(g.nodes(), g.nodes())) g = DGLGraph(g) n_edges = g.number_of_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) # create GCN model model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) if epoch >= 3: dur.append(time.time() - t0) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() acc = evaluate(model, features, labels, val_mask) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def main(args): # load and preprocess dataset # data = load_data(args) g, graph_labels = load_graphs( '/yushi/dataset/Amazon2M/Amazon2M_dglgraph.bin') assert len(g) == 1 g = g[0] data = g.ndata features = torch.FloatTensor(data['feat']) labels = torch.LongTensor(data['label']) if hasattr(torch, 'BoolTensor'): train_mask = data['train_mask'].bool() val_mask = data['val_mask'].bool() test_mask = data['test_mask'].bool() # else: # train_mask = torch.ByteTensor(data.train_mask) # val_mask = torch.ByteTensor(data.val_mask) # test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = 47 n_edges = g.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess and calculate normalization factor # g = data.graph # add self loop # if args.self_loop: # g.remove_edges_from(nx.selfloop_edges(g)) # g.add_edges_from(zip(g.nodes(), g.nodes())) # g = DGLGraph(g) n_edges = g.number_of_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) # create GCN model model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout) if cuda: model.cuda() print(model) loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] start = time.time() for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, features, labels, val_mask) # no test_mask print("Test accuracy {:.2%}".format(acc)) print( f'Training Time Consuming: {np.sum(dur)}, all time cost: {time.time() - start}' )
"_nodedr_" + str(node_dropout) + \ "_messdr_" + str(mess_dropout) + \ "_reg_" + str(reg) + \ "_lr_" + str(lr) # create GCN model model = GCN(data_generator.n_users, data_generator.n_items, emb_dim, layers, reg, node_dropout, mess_dropout, adj_mtx) model.to(device='cuda:0') model = torch.nn.DataParallel(model, device_ids=[0, 1]) # current best metric cur_best_metric = 0 # Adam optimizer # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) # Set values for early stopping cur_best_loss, stopping_step, should_stop = 1e3, 0, False today = datetime.now() print("Start at " + str(today)) print("Using " + str(device) + " for computations") print("Params on CUDA: " + str(next(model.parameters()).is_cuda)) results = { "Epoch": [], "Loss": [], "Recall": [], "NDCG": [], "Training Time": [] }
def main(args): train_mask = input_train_mask test_mask = input_test_mask in_feats = args.n_input_features n_classes = args.n_classes g = DGLGraph(data_adj) g.add_edges(g.nodes(), g.nodes()) n_edges = g.number_of_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 g.ndata['norm'] = norm.unsqueeze(1) # create GCN model model_adv = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.leaky_relu, args.dropout) model_non_adv = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.leaky_relu, args.dropout) loss_fcn = torch.nn.MSELoss() # use optimizer optimizer = torch.optim.Adam(model_adv.parameters(), lr=args.lr, weight_decay=args.weight_decay) print(optimizer.state_dict()['param_groups'][0]['lr']) # initialize graph dur = [] for epoch in range(args.n_epochs): # print("learning_rate", scheduler.get_lr()) # adjust_learning_rate(optimizer=optimizer, epoch=epoch) shufle_index = np.arange(int(train_portion)) np.random.shuffle(shufle_index) for t in range(train_portion): features, labels = load_data(shufle_index[t]) # features.requires_grad = True model_non_adv.train() model_adv.train() # if epoch >= 3: t0 = time.time() # adv_features = distr_attack(model=model, loss_fcn=loss_fcn, feature_train=features, # label_train=labels, gamma=0.000001, T_adv=20) # adv_features = ifgsm_attack(model=model, loss_fcn=loss_fcn, optimizer=optimizer, feature=features, # label=labels, T_adv=20, epsilon=1, lr=0.1) adv_features = fgsm_attack_generateor(model=model_adv, loss_fcn=loss_fcn, feature=features, label=labels, epsilon_fgsm=0, mask=train_mask) # forward logits = model_adv(adv_features) # TO DO: change this loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() adjust_learning_rate(optimizer, epoch) loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model_adv, features, labels, train_mask) learning_rate = optimizer.state_dict()['param_groups'][0]['lr'] print( "Epoch {:05d} | learning_rate {:.4f} | Iter {:05d}| Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} |" "ETputs(KTEPS) {:.2f}".format(epoch, learning_rate, t, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) history_acc_test = {} history_acc_test["nominal"] = [] # history_acc_test["distr"] = [] history_acc_test["fgsm"] = [] history_acc_test["ifgsm"] = [] # eps = [0, 0.001, 0.002, 0.003, 0.005, 0.008, 0.01, 0.02, 0.04, 0.06, 0.08] eps = [0, .001, .005, .009, .02, 0.05, 0.08] for _, i_eps in enumerate(eps): # history_acc_test["distr"].append(test_distr(model_adv, loss_fcn, gamma=0.001, T_adv=20, test_mask=test_mask)) # Increasing T_adv does not affect very much, smaller gamma and smaller T_adv the better out = test_fgsm(model_adv, epsilon=i_eps, test_mask=test_mask) history_acc_test["nominal"].append(out[0]) history_acc_test["fgsm"].append(out[1]) history_acc_test["ifgsm"].append( test_ifgsm(model_adv, loss_fcn, optimizer, epsilon=i_eps, T_adv=20, lr=0.1, test_mask=test_mask)) plot_graphs(data=history_acc_test)
def main(args): # load and preprocess dataset data = load_data(args) # structure_features = np.load('../../pretrained/' + args.dataset + '_structure_8d.npy') attr_features = np.load('../../pretrained/' + args.dataset + '_attr_8d.npy') structure_features = preprocessing.scale(structure_features, axis=1, with_mean=True, with_std=True, copy=True) #structure_features = preprocessing.scale(structure_features, axis=0, with_mean=True,with_std=True,copy=True) structure_features = torch.FloatTensor(structure_features).cuda() attr_features = preprocessing.scale(attr_features, axis=1, with_mean=True, with_std=True, copy=True) #attr_features = preprocessing.scale(attr_features, axis=0, with_mean=True,with_std=True,copy=True) attr_features = torch.FloatTensor(attr_features).cuda() in_feats2 = structure_features.shape[1] in_feats3 = attr_features.shape[1] print(structure_features.shape, attr_features.shape) #data.features = preprocessing.scale(data.features, axis=1, with_mean=True,with_std=True,copy=True) #data.features = preprocessing.scale(data.features, axis=0, with_mean=True,with_std=True,copy=True) # features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats1 = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess and calculate normalization factor g = data.graph # add self loop if args.self_loop: g.remove_edges_from(g.selfloop_edges()) g.add_edges_from(zip(g.nodes(), g.nodes())) g = DGLGraph(g) n_edges = g.number_of_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) # create GCN model #alpha2_set = [0,0.001,0.002,0.004,0.006,0.008,0.01,0.02,0.03,0.04,0.05] #alpha3_set = [0,0.001,0.002,0.004,0.006,0.008,0.01,0.02,0.03,0.04,0.05] alpha2_set = [0.02] alpha3_set = [0.03] alpha1 = 1 for alpha2 in alpha2_set: for alpha3 in alpha3_set: result = [] for iter in range(30): model = GCN(g, in_feats1, in_feats2, in_feats3, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, alpha1, alpha2, alpha3) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] best_val_acc = 0 best_test_acc = 0 for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features, structure_features, attr_features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) val_acc = evaluate(model, features, structure_features, attr_features, labels, val_mask) if val_acc >= best_val_acc: best_val_acc = val_acc best_test_acc = evaluate(model, features, structure_features, attr_features, labels, test_mask) #print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " # "ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), # acc, n_edges / np.mean(dur) / 1000)) #print() #acc = evaluate(model, features,dw_features, labels, test_mask) #print("Test Accuracy {:.4f}".format(acc)) result.append(best_test_acc) del model #print(best_test_acc) print(alpha2, alpha3, np.average(result), result)
def main(args): # load and preprocess dataset if args.gpu > 0: cuda = True device = torch.device('cuda:{}'.format(args.gpu)) else: device = torch.device('cpu') cuda = False cora_data = NeptuneCoraDataset(device, valid_ratio=0.1, test_ratio=0.2) #cora_data = CoraDataset(device, valid_ratio=0.1, test_ratio=0.2) features = cora_data.features test_set = cora_data.test_set valid_set = cora_data.valid_set train_set = cora_data.train_set g = cora_data.g in_feats = features['h**o'].shape[1] n_edges = g.number_of_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) # create GCN model model = GCN(g, in_feats, args.n_hidden, cora_data.n_class, args.n_layers, F.relu, args.dropout) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features['h**o']) loss = loss_fcn(logits[train_set[0]], train_set[1]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features['h**o'], valid_set) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, features['h**o'], test_set) print("Test accuracy {:.2%}".format(acc)) torch.save(model.state_dict(), args.model_path)
import torch_geometric.transforms as T dataset = 'Cora' path = osp.join(osp.dirname(osp.realpath(__file__)), '.', 'data', dataset) dataset = Planetoid(path, dataset, T.NormalizeFeatures()) data = dataset[0] from gcn import GCN from sage import SAGE dataset = 'Cora' path = osp.join(osp.dirname(osp.realpath(__file__)), '.', 'data', dataset) dataset = Planetoid(path, dataset, T.NormalizeFeatures()) data = dataset[0] num_nodes = data.x.size(0) input_dim = data.x.size(1) hidden_dim = 16 num_classes = 7 model = GCN(input_dim, hidden_dim, num_classes) optimizer = torch.optim.Adam(model.parameters(), lr=0.02, weight_decay=0) for epoch in range(1, 2000): optimizer.zero_grad() output = model.forward_(data.x, data.edge_index) loss = model.loss(output, data.y) loss.backward() optimizer.step() acc = output.max(1)[1].eq(data.y).sum().item() / num_nodes print('epoch=%d loss=%f acc=%f' % (epoch, loss.item(), acc))
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.int().to(args.gpu) features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) # add self loop if args.self_loop: g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) # create GCN model model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, features, labels, test_mask) print("Test accuracy {:.2%}".format(acc))
def main(args): # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, "BoolTensor"): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item(), )) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess and calculate normalization factor g = data.graph # add self loop if args.self_loop: g.remove_edges_from(nx.selfloop_edges(g)) g.add_edges_from(zip(g.nodes(), g.nodes())) g = DGLGraph(g) n_edges = g.number_of_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata["norm"] = norm.unsqueeze(1) # create GCN model model = GCN( g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, ) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) accuracy, precision, recall, fscore, _ = evaluate( model, features, labels, val_mask) print("Epoch:", epoch) print("Loss:", loss.item()) print("Accuracy:", accuracy) print("Precision:", precision) print("Recall:", recall) print("F-Score:", fscore) print() print("=" * 80) print() accuracy, precision, recall, fscore, class_based_report = evaluate( model, features, labels, test_mask) print("=" * 80) print(" " * 28 + "Final Statistics") print("=" * 80) print("Accuracy", accuracy) print("Precision", precision) print("Recall", recall) print("F-Score", fscore) print(class_based_report)
def main(args): # convert boolean type for args assert args.self_loop in ['True', 'False'], [ "Only True or False for self_loop, get ", args.self_loop ] assert args.use_layernorm in ['True', 'False'], [ "Only True or False for use_layernorm, get ", args.use_layernorm ] self_loop = (args.self_loop == 'True') use_layernorm = (args.use_layernorm == 'True') global t0 if args.dataset in {'cora', 'citeseer', 'pubmed'}: data = load_data(args) else: raise NotImplementedError(f'{args.dataset} is not a valid dataset') features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item())) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') features = features.to(device) labels = labels.to(device) train_mask = train_mask.to(device) val_mask = val_mask.to(device) test_mask = test_mask.to(device) # graph preprocess and calculate normalization factor g = data.graph # add self loop if self_loop: g.remove_edges_from(nx.selfloop_edges(g)) g.add_edges_from(zip(g.nodes(), g.nodes())) g = DGLGraph(g) g = g.to(device) n_edges = g.number_of_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 norm = norm.to(device) g.ndata['norm'] = norm.unsqueeze(1) # create GCN model model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, use_layernorm) model = model.to(device) loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph record = [] dur = [] for epoch in range(args.n_epochs): if args.lr_scheduler: if epoch == int(0.5 * args.n_epochs): for pg in optimizer.param_groups: pg['lr'] = pg['lr'] / 10 elif epoch == int(0.75 * args.n_epochs): for pg in optimizer.param_groups: pg['lr'] = pg['lr'] / 10 model.train() if epoch >= 3: t0 = time.time() # forward optimizer.zero_grad() logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc_val = evaluate(model, features, labels, val_mask) acc_test = evaluate(model, features, labels, test_mask) record.append([acc_val, acc_test]) all_test_acc = [v[1] for v in record] all_val_acc = [v[0] for v in record] acc = evaluate(model, features, labels, test_mask) print(f"Final Test Accuracy: {acc:.4f}") print(f"Best Val Accuracy: {max(all_val_acc):.4f}") print(f"Best Test Accuracy: {max(all_test_acc):.4f}")
def main(training_file, dev_file, test_file, epochs=None, patience=None, num_heads=None, num_out_heads=None, num_layers=None, num_hidden=None, residual=None, in_drop=None, attn_drop=None, lr=None, weight_decay=None, alpha=None, batch_size=None, graph_type=None, net=None, freeze=None, cuda=None, fw=None): # number of training epochs if epochs is None: epochs = 400 print('EPOCHS', epochs) # used for early stop if patience is None: patience = 15 print('PATIENCE', patience) # number of hidden attention heads if num_heads is None: num_heads_ch = [4, 5, 6, 7] else: num_heads_ch = flattenList(num_heads) print('NUM HEADS', num_heads_ch) # number of output attention heads if num_out_heads is None: num_out_heads_ch = [4, 5, 6, 7] else: num_out_heads_ch = flattenList(num_out_heads) print('NUM OUT HEADS', num_out_heads_ch) # number of hidden layers if num_layers is None: num_layers_ch = [2, 3, 4, 5, 6] else: num_layers_ch = flattenList(num_layers) print('NUM LAYERS', num_layers_ch) # number of hidden units if num_hidden is None: num_hidden_ch = [32, 64, 96, 128, 256, 350, 512] else: num_hidden_ch = flattenList(num_hidden) print('NUM HIDDEN', num_hidden_ch) # use residual connection if residual is None: residual_ch = [True, False] else: residual_ch = flattenList(residual) print('RESIDUAL', residual_ch) # input feature dropout if in_drop is None: in_drop_ch = [0., 0.001, 0.0001, 0.00001] else: in_drop_ch = flattenList(in_drop) print('IN DROP', in_drop_ch) # attention dropout if attn_drop is None: attn_drop_ch = [0., 0.001, 0.0001, 0.00001] else: attn_drop_ch = flattenList(attn_drop) print('ATTENTION DROP', attn_drop_ch) # learning rate if lr is None: lr_ch = [0.0000005, 0.0000015, 0.00001, 0.00005, 0.0001] else: lr_ch = flattenList(lr) print('LEARNING RATE', lr_ch) # weight decay if weight_decay is None: weight_decay_ch = [0.0001, 0.001, 0.005] else: weight_decay_ch = flattenList(weight_decay) print('WEIGHT DECAY', weight_decay_ch) # the negative slop of leaky relu if alpha is None: alpha_ch = [0.1, 0.15, 0.2] else: alpha_ch = flattenList(alpha) print('ALPHA', alpha_ch) # batch size used for training, validation and test if batch_size is None: batch_size_ch = [175, 256, 350, 450, 512, 800, 1600] else: batch_size_ch = flattenList(batch_size) print('BATCH SIZE', batch_size_ch) # net type if net is None: net_ch = [GCN, GAT, RGCN, PGCN, PRGCN, GGN, PGAT] else: net_ch_raw = flattenList(net) net_ch = [] for ch in net_ch_raw: if ch.lower() == 'gcn': if fw == 'dgl': net_ch.append(GCN) else: net_ch.append(PGCN) elif ch.lower() == 'gat': if fw == 'dgl': net_ch.append(GAT) else: net_ch.append(PGAT) elif ch.lower() == 'rgcn': if fw == 'dgl': net_ch.append(RGCN) else: net_ch.append(PRGCN) elif ch.lower() == 'ggn': net_ch.append(GGN) elif ch.lower() == 'rgat': net_ch.append(PRGAT) else: print('Network type {} is not recognised.'.format(ch)) sys.exit(1) print('NET TYPE', net_ch) # graph type if net_ch in [GCN, GAT, PGCN, GGN, PGAT]: if graph_type is None: graph_type_ch = ['raw', '1', '2', '3', '4', 'relational'] else: graph_type_ch = flattenList(graph_type) else: if graph_type is None: graph_type_ch = ['relational'] else: graph_type_ch = flattenList(graph_type) print('GRAPH TYPE', graph_type_ch) # Freeze input neurons? if freeze is None: freeze_ch = [True, False] else: freeze_ch = flattenList(freeze) print('FREEZE', freeze_ch) # CUDA? if cuda is None: device = torch.device("cpu") elif cuda: device = torch.device("cuda") else: device = torch.device("cpu") print('DEVICE', device) if fw is None: fw = ['dgl', 'pg'] # define loss function # loss_fcn = torch.nn.BCEWithLogitsLoss() loss_fcn = torch.nn.MSELoss() for trial in range(10): trial_s = str(trial).zfill(6) num_heads = random.choice(num_heads_ch) num_out_heads = random.choice(num_out_heads_ch) num_layers = random.choice(num_layers_ch) num_hidden = random.choice(num_hidden_ch) residual = random.choice(residual_ch) in_drop = random.choice(in_drop_ch) attn_drop = random.choice(attn_drop_ch) lr = random.choice(lr_ch) weight_decay = random.choice(weight_decay_ch) alpha = random.choice(alpha_ch) batch_size = random.choice(batch_size_ch) graph_type = random.choice(graph_type_ch) net_class = random.choice(net_ch) freeze = random.choice(freeze_ch) fw = random.choice(fw) if freeze == False: freeze = 0 else: if graph_type == 'raw' or graph_type == '1' or graph_type == '2': freeze = 4 elif graph_type == '3' or graph_type == '4': freeze = 6 elif graph_type == 'relational': freeze = 5 else: sys.exit(1) print('=========================') print('TRIAL', trial_s) print('HEADS', num_heads) print('OUT_HEADS', num_out_heads) print('LAYERS', num_layers) print('HIDDEN', num_hidden) print('RESIDUAL', residual) print('inDROP', in_drop) print('atDROP', attn_drop) print('LR', lr) print('DECAY', weight_decay) print('ALPHA', alpha) print('BATCH', batch_size) print('GRAPH_ALT', graph_type) print('ARCHITECTURE', net_class) print('FREEZE', freeze) print('FRAMEWORK', fw) print('=========================') # create the dataset print('Loading training set...') train_dataset = SocNavDataset(training_file, mode='train', alt=graph_type) print('Loading dev set...') valid_dataset = SocNavDataset(dev_file, mode='valid', alt=graph_type) print('Loading test set...') test_dataset = SocNavDataset(test_file, mode='test', alt=graph_type) print('Done loading files') train_dataloader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=collate) valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, collate_fn=collate) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=collate) num_rels = train_dataset.data[0].num_rels cur_step = 0 best_loss = -1 n_classes = train_dataset.labels.shape[1] print('Number of classes: {}'.format(n_classes)) num_feats = train_dataset.features.shape[1] print('Number of features: {}'.format(num_feats)) g = train_dataset.graph heads = ([num_heads] * num_layers) + [num_out_heads] # define the model if fw == 'dgl': if net_class in [GCN]: model = GCN(g, num_feats, num_hidden, n_classes, num_layers, F.elu, in_drop) elif net_class in [GAT]: model = net_class(g, num_layers, num_feats, num_hidden, n_classes, heads, F.elu, in_drop, attn_drop, alpha, residual, freeze=freeze) else: # def __init__(self, g, in_dim, h_dim, out_dim, num_rels, num_hidden_layers=1): model = RGCN(g, in_dim=num_feats, h_dim=num_hidden, out_dim=n_classes, num_rels=num_rels, feat_drop=in_drop, num_hidden_layers=num_layers, freeze=freeze) else: if net_class in [PGCN]: model = PGCN( num_feats, n_classes, num_hidden, num_layers, in_drop, F.relu, improved=True, #Compute A-hat as A + 2I bias=True) elif net_class in [PRGCN]: model = PRGCN( num_feats, n_classes, num_rels, num_rels, #num_rels? # TODO: Add variable num_hidden, num_layers, in_drop, F.relu, bias=True) elif net_class in [PGAT]: model = PGAT(num_feats, n_classes, num_heads, in_drop, num_hidden, num_layers, F.relu, concat=True, neg_slope=alpha, bias=True) elif net_class in [PRGAT]: model = PRGAT( num_feats, n_classes, num_heads, num_rels, num_rels, #num_rels? # TODO: Add variable num_hidden, num_layers, num_layers, in_drop, F.relu, alpha, bias=True) else: model = GGN(num_feats, num_layers, aggr='mean', bias=True) #Describe the model #describe_model(model) # define the optimizer optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) # for name, param in model.named_parameters(): # if param.requires_grad: # print(name, param.data.shape) model = model.to(device) for epoch in range(epochs): model.train() loss_list = [] for batch, data in enumerate(train_dataloader): subgraph, feats, labels = data subgraph.set_n_initializer(dgl.init.zero_initializer) subgraph.set_e_initializer(dgl.init.zero_initializer) feats = feats.to(device) labels = labels.to(device) if fw == 'dgl': model.g = subgraph for layer in model.layers: layer.g = subgraph logits = model(feats.float()) else: if net_class in [PGCN, PGAT, GGN]: data = Data(x=feats.float(), edge_index=torch.stack( subgraph.edges()).to(device)) else: data = Data( x=feats.float(), edge_index=torch.stack( subgraph.edges()).to(device), edge_type=subgraph.edata['rel_type'].squeeze().to( device)) logits = model(data) loss = loss_fcn(logits[getMaskForBatch(subgraph)], labels.float()) optimizer.zero_grad() a = list(model.parameters())[0].clone() loss.backward() optimizer.step() b = list(model.parameters())[0].clone() not_learning = torch.equal(a.data, b.data) if not_learning: import sys print('Not learning') # sys.exit(1) else: pass # print('Diff: ', (a.data-b.data).sum()) # print(loss.item()) loss_list.append(loss.item()) loss_data = np.array(loss_list).mean() print('Loss: {}'.format(loss_data)) if epoch % 5 == 0: if epoch % 5 == 0: print( "Epoch {:05d} | Loss: {:.4f} | Patience: {} | ".format( epoch, loss_data, cur_step), end='') score_list = [] val_loss_list = [] for batch, valid_data in enumerate(valid_dataloader): subgraph, feats, labels = valid_data subgraph.set_n_initializer(dgl.init.zero_initializer) subgraph.set_e_initializer(dgl.init.zero_initializer) feats = feats.to(device) labels = labels.to(device) score, val_loss = evaluate(feats.float(), model, subgraph, labels.float(), loss_fcn, fw, net_class) score_list.append(score) val_loss_list.append(val_loss) mean_score = np.array(score_list).mean() mean_val_loss = np.array(val_loss_list).mean() if epoch % 5 == 0: print("Score: {:.4f} MEAN: {:.4f} BEST: {:.4f}".format( mean_score, mean_val_loss, best_loss)) # early stop if best_loss > mean_val_loss or best_loss < 0: best_loss = mean_val_loss # Save the model # print('Writing to', trial_s) torch.save( model.state_dict(), fw + str(net) + '.tch' ) # 3 4 5 6 7 8 9 10 11 12 13 14 15 params = [ val_loss, graph_type, str(type(net_class)), g, num_layers, num_feats, num_hidden, n_classes, heads, F.elu, in_drop, attn_drop, alpha, residual, num_rels, freeze ] pickle.dump(params, open(fw + str(net) + '.prms', 'wb')) cur_step = 0 else: cur_step += 1 if cur_step >= patience: break torch.save(model, 'gattrial.pth') test_score_list = [] for batch, test_data in enumerate(test_dataloader): subgraph, feats, labels = test_data subgraph.set_n_initializer(dgl.init.zero_initializer) subgraph.set_e_initializer(dgl.init.zero_initializer) feats = feats.to(device) labels = labels.to(device) test_score_list.append( evaluate(feats, model, subgraph, labels.float(), loss_fcn, fw, net_class)[0]) print("F1-Score: {:.4f}".format(np.array(test_score_list).mean())) model.eval() return best_loss
# set device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # load dataset dataset = Planetoid(root='/tmp/' + args.dataset, name=args.dataset) data = dataset[0].to(device) # generate model and optimizer with parameter if args.model == 'GCN': model = GCN(dataset.num_features, args.hidden, dataset.num_classes).to(device) else: model = GraphSAGE(dataset.num_features, args.hidden, dataset.num_classes).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # define two list for plot Accuracy_list = [] Loss_list = [] # train the model model.train() for epoch in range(args.epochs): optimizer.zero_grad() out = model(data.x, data.edge_index) _, pred = model(data.x, data.edge_index).max(dim=1) correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item()) acc = correct / int(data.test_mask.sum())
output = model(features, adj) loss_test = F.nll_loss(output[test_idx], labels[test_idx]) acc_test = accuracy(output[test_idx], labels[test_idx]) print('accuracy:{:.2f}, time:{:.4f}'.format(acc_test.item(), time.time() - b)) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') adj, features, labels, train_idx, val_idx, test_idx = load_data(args.dataset) model = GCN(features.shape[1], args.hidden, labels.shape[1], args.activation, args.dropout) _, labels = torch.max(labels, 1) model = model.to(device) adj = adj.to(device) features = features.to(device) labels = labels.to(device) train_idx = train_idx.to(device) val_idx = val_idx.to(device) test_idx = test_idx.to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(args.epochs): train(epoch) test()
def run(rank, world_size, args): print('Running DDP on rank', rank, 'world size', world_size) setup(rank, world_size, args) dev_id = ragdoll.device_id() if len(args.input_graph) > 0 or len(args.cached_dir) > 0: data = SynDataset(rank == 0, args) else: data = Dataset(rank == 0, args) feat_size = args.feat_size features = torch.FloatTensor(data.features).cuda() labels = torch.LongTensor(data.labels).cuda() train_mask = torch.BoolTensor(data.train_mask).cuda() val_mask = torch.BoolTensor(data.val_mask).cuda() test_mask = torch.BoolTensor(data.test_mask).cuda() n_classes = args.n_classes n_nodes = data.n_nodes local_n_nodes = data.local_n_nodes model = GCN(data.graph, n_nodes, local_n_nodes, True, feat_size, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, comm_net=args.comm_net) model.cuda() model = DDP(model, device_ids=[dev_id]) loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) optimizer.zero_grad() dur = [] print("Start training... for {} epochs".format(args.n_epochs)) for epoch in range(args.n_epochs): print('Epoch {} -------------'.format(epoch)) model.train() torch.distributed.barrier() if epoch >= 3: t0 = time.time() logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() torch.cuda.synchronize() t1 = time.time() optimizer.step() torch.cuda.synchronize() t2 = time.time() if epoch >= 3: dur.append(time.time() - t0) # acc, _, _ = evaluate(model, features, labels, val_mask) # print('acc is {}, loss is {}, this epoch using time {}, avg time {}.'.format( # acc, loss.item(), dur[-1] if epoch >= 3 else 0, np.mean(dur))) print('Using time to synchronize model', t2 - t1) print('Peak memory is {} GB'.format( torch.cuda.max_memory_allocated(dev_id) / 1e9)) print('this epoch uses time {} s, avg time {} s.'.format( dur[-1] if epoch >= 3 else 0, np.mean(dur))) ##acc, corr, total = evaluate(model, features, labels, test_mask) ##print('my corr is', corr, 'my total is', total) ##corr = torch.Tensor([corr]).cuda(dev_id) ##total = torch.Tensor([total]).cuda(dev_id) ##corrs, totals = [], [] ##for i in range(world_size): ## corrs.append(torch.Tensor([0]).cuda(dev_id)) ## totals.append(torch.Tensor([0]).cuda(dev_id)) ##torch.distributed.all_gather(corrs, corr) ##torch.distributed.all_gather(totals, total) ##print('corrs is', corrs) ##print('totals is', totals) ##corr = torch.stack(corrs, dim=0).sum(dim=0).item() * 1.0 ##total = torch.stack(totals, dim=0).sum(dim=0).item() * 1.0 ##print('Test acc is', corr / total) cleanup()
os.makedirs(results_folder) checkpoint_path = results_folder + '/' + 'model.pth' train_dataset = CocoDataset(train_path, train_ann_file, num_classes) val_dataset = CocoDataset(val_path, val_ann_file, num_classes) train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=1) val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) criterion = nn.BCEWithLogitsLoss() train_detections = pickle.load(open(train_pickle_file, 'rb')) val_detections = pickle.load(open(val_pickle_file, 'rb')) total_train_images = len(train_loader) total_val_images = len(val_loader) print('\n') print(f'Total train images: {total_train_images}') print(f'Total validation images: {total_val_images}') print('\n') print('Training...') print('-' * 100)