'acc_val: {:.4f}'.format(acc_val.data.item()), 'time: {:.4f}s'.format(time.time() - t)) return loss_val.data.item() # Train model t_total = time.time() loss_values = [] bad_counter = 0 best = 1E9 best_epoch = 0 for epoch in range(args.epochs): loss_values.append(train(epoch)) torch.save(model.state_dict(), os.path.join(dump_dir, '{}.pkl'.format(epoch))) if loss_values[-1] < best: best = loss_values[-1] best_epoch = epoch bad_counter = 0 else: bad_counter += 1 if bad_counter == args.patience: break files = glob.glob(os.path.join(dump_dir, '*.pkl')) for file in files: filename = os.path.split(file)[-1] epoch_nb = int(filename.split('.')[0])
"loss= {:.4f}".format(loss_test.data.item()), "accuracy= {:.4f}".format(acc_test.data.item())) return acc_test.data.item() #Train model t_total = time.time() loss_values = [] bad_counter = 0 best = args.epochs + 1 best_epoch = 0 for epoch in range(args.epochs): loss_values.append(train(epoch)) torch.save(model.state_dict(), '{}.pkl'.format(epoch)) if loss_values[-1] < best: best = loss_values[-1] best_epoch = epoch bad_counter = 0 else: bad_counter += 1 if bad_counter == args.patience: break files = glob.glob('*.pkl') for file in files: epoch_nb = int(file.split('.')[0]) if epoch_nb < best_epoch:
def train_ray_int(opt, checkpoint_dir=None, data_dir="../data"): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") data = get_dataset(opt) g = data[0] if opt['gpu'] < 0: cuda = False else: cuda = True g = g.int().to(opt['gpu']) # if opt["num_splits"] > 0: # dataset.data = set_train_val_test_split( # 23 * np.random.randint(0, opt["num_splits"]), # random prime 23 to make the splits 'more' random. Could remove # dataset.data, # num_development=5000 if opt["dataset"] == "CoauthorCS" else 1500) features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] num_feats = features.shape[1] n_classes = data.num_classes n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # create model heads = ([opt['num_heads']] * opt['num_layers']) + [opt['num_out_heads']] if opt['model'] == 'GAT': model = GAT(g, opt['num_layers'], num_feats, opt['num_hidden'], n_classes, heads, F.elu, opt['in_drop'], opt['attn_drop'], opt['negative_slope'], opt['residual'], opt) elif opt['model'] == 'AGNN': model = AGNN(g, opt['num_layers'], num_feats, opt['num_hidden'], n_classes, opt['in_drop'], opt) model = model.to(device) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) parameters = [p for p in model.parameters() if p.requires_grad] optimizer = get_optimizer(opt["optimizer"], parameters, lr=opt["lr"], weight_decay=opt["weight_decay"]) if checkpoint_dir: checkpoint = os.path.join(checkpoint_dir, "checkpoint") model_state, optimizer_state = torch.load(checkpoint) model.load_state_dict(model_state) optimizer.load_state_dict(optimizer_state) train_this = train this_test = test_OGB if opt['dataset'] == 'ogbn-arxiv' else test best_time = best_epoch = train_acc = val_acc = test_acc = 0 for epoch in range(1, opt["epoch"]): # loss = train(model, optimizer, data) loss = train_this(model, optimizer, features, train_mask, labels)[0].item() if opt["no_early"]: tmp_train_acc, tmp_val_acc, tmp_test_acc = this_test(model, g) best_time = opt['time'] else: tmp_train_acc, tmp_val_acc, tmp_test_acc = this_test(model, g) if tmp_val_acc > val_acc: best_epoch = epoch train_acc = tmp_train_acc val_acc = tmp_val_acc test_acc = tmp_test_acc with tune.checkpoint_dir(step=epoch) as checkpoint_dir: path = os.path.join(checkpoint_dir, "checkpoint") torch.save((model.state_dict(), optimizer.state_dict()), path) tune.report(loss=loss, accuracy=val_acc, test_acc=test_acc, train_acc=train_acc, best_time=best_time, best_epoch=best_epoch)
# Train model t_total = time.time() loss_values = [] bad_counter = 0 best = np.inf best_epoch = 0 for epoch in range(args.epochs): loss_values.append(train(epoch)) if loss_values[-1] < best: if args.cuda: model.cpu() torch.save( model.state_dict(), '{}_{}_{}.pkl'.format(model._get_name(), args.dataset, epoch)) if args.cuda: model.cuda() best = loss_values[-1] best_epoch = epoch bad_counter = 0 else: bad_counter += 1 if bad_counter == args.patience: print("Patience {0} exceeded. Best in last {0} epochs is {1:.4f}.". format(args.patience, best)) break files = glob.glob('{}_{}_*.pkl'.format(model._get_name(), args.dataset))
acc_test = accuracy(output[idx_test], labels[idx_test]) print("Test set results:", "loss= {:.4f}".format(loss_test.data[0]), "accuracy= {:.4f}".format(acc_test.data[0])) # Train model t_total = time.time() loss_values = [] bad_counter = 0 best = args.epochs + 1 best_epoch = 0 for epoch in range(args.epochs): loss_values.append(train(epoch)) torch.save(model.state_dict(), '{}.pkl'.format(epoch)) if loss_values[-1] < best: best = loss_values[-1] best_epoch = epoch bad_counter = 0 else: bad_counter += 1 if bad_counter == args.patience: break files = glob.glob('*.pkl') for file in files: epoch_nb = int(file.split('.')[0]) if epoch_nb < best_epoch: os.remove(file)
iter += 1 index = content_g[subgraph]['index_subgraph'] idx_train = content_g[subgraph]['idx_train'] idx_val = content_g[subgraph]['idx_val'] adj = content_g[subgraph]['adj'] adj = torch.FloatTensor(np.array(adj.todense())) val_loss, train_loss, train_acc, val_acc = train( iter, features[index], adj, labels[index], idx_train, idx_val) loss_values.append(val_loss) train_loss_list.append(train_loss) train_acc_list.append(train_acc) val_loss_list.append(val_loss) val_acc_list.append(val_acc) torch.save(model.state_dict(), '{}.pkl'.format(iter)) if loss_values[-1] < best: best = loss_values[-1] best_epoch = iter bad_counter = 0 else: bad_counter += 1 if bad_counter == args.patience: break files = glob.glob('*.pkl') for file in files: epoch_nb = int(file.split('.')[0]) if epoch_nb < best_epoch: os.remove(file)
print('Epoch: {:04d}'.format(epoch + 1), 'time: {:.4f}s'.format(time.time() - t)) def test(): print('testing') model.eval() O = [] for _ in range(len(x_test)): x_s = torch.Tensor(x_train[_]).float()[None, :].permute(1, 0).cuda() output = model(x_s, adj) O.append(output) with open('31_365/pred_1_5_1_30_SL1.pkl', 'w') as f: pickle.dump(O, f) t_total = time.time() for epoch in range(10): train(epoch) print("Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) test() print('saving') # save model name - GCN_nfeat_nhid1..._nclass_batchsize torch.save(model.state_dict(), '31_365/models/GCN_1_5_1_30_SL1.pt')