print("Test set results:", "loss= {:.4f}".format(loss_test.data.item()), "r2_test= {:.4f}".format(r2_test), "l0_norm_att_test: {}".format(l0_norm_att_test)) # Train model t_total = time.time() loss_values = [] bad_counter = 0 best = args.epochs + 1 best_epoch = 0 for epoch in range(args.epochs): loss_values.append(train(epoch)) torch.save(model.state_dict(), filedir + '/{}.pkl'.format(epoch)) if loss_values[-1] < best: best = loss_values[-1] best_epoch = epoch bad_counter = 0 else: bad_counter += 1 if bad_counter == args.patience: break files = glob.glob(filedir + '/*.pkl') for file in files: epoch_nb = int(file.split('/')[-1][:-4]) if epoch_nb < best_epoch: os.remove(file)
loss_test = F.nll_loss(output[idx_test], labels[idx_test]) acc_test = accuracy(output[idx_test], labels[idx_test]) print("Test set results:", "loss= {:.4f}".format(loss_test.item()), "accuracy= {:.4f}".format(acc_test.item())) # Train model t_total = time.time() loss_values = [] bad_counter = 0 best = args.epochs + 1 best_epoch = 0 for epoch in range(args.epochs): loss_values.append(train(epoch)) torch.save(model.state_dict(), '{}.pkl'.format(epoch)) if loss_values[-1] < best: best = loss_values[-1] best_epoch = epoch bad_counter = 0 else: bad_counter += 1 if bad_counter == args.patience: break files = glob.glob('*.pkl') for file in files: epoch_nb = int(file.split('.')[0]) if epoch_nb < best_epoch: os.remove(file)
def train_pipeline(self, adj, features, labels, idx_train, idx_val, idx_test, *args): adj = normalize_adj(adj + sp.eye(adj.shape[0])) if sp.issparse(adj): adj = adj.todense() if sp.issparse(features): features = features.todense() # With networkx, we no longer need to convert from one-hot encoding... # labels = np.where(labels)[1] adj = torch.FloatTensor(adj) features = torch.FloatTensor(features) labels = torch.LongTensor(labels) idx_train = torch.LongTensor(idx_train) idx_val = torch.LongTensor(idx_val) idx_test = torch.LongTensor(idx_test) random.seed(self.args.seed) np.random.seed(self.args.seed) torch.manual_seed(self.args.seed) if self.args.cuda: torch.cuda.manual_seed(self.args.seed) # Model and optimizer if self.args.sparse: model = SpGAT( nfeat=features.shape[1], nhid=self.args.hidden, nclass=int(labels.max()) + 1, dropout=self.args.dropout, nheads=self.args.nb_heads, alpha=self.args.alpha, ) else: model = GAT( nfeat=features.shape[1], nhid=self.args.hidden, nclass=int(labels.max()) + 1, dropout=self.args.dropout, nheads=self.args.nb_heads, alpha=self.args.alpha, ) optimizer = optim.Adam(model.parameters(), lr=self.args.lr, weight_decay=self.args.weight_decay) if self.args.cuda: model.cuda() features = features.cuda() adj = adj.cuda() labels = labels.cuda() idx_train = idx_train.cuda() idx_val = idx_val.cuda() idx_test = idx_test.cuda() features, adj, labels = Variable(features), Variable(adj), Variable( labels) # TODO: Test if these lines could be written below line 41. self.adj = adj self.features = features self.labels = labels self.idx_train = idx_train self.idx_val = idx_val self.idx_test = idx_test def train(epoch): t = time.time() model.train() optimizer.zero_grad() output = model(features, adj) loss_train = F.nll_loss(output[idx_train], labels[idx_train]) acc_train = accuracy(output[idx_train], labels[idx_train]) loss_train.backward() optimizer.step() if not self.args.fastmode: # Evaluate validation set performance separately, # deactivates dropout during validation run. model.eval() output = model(features, adj) loss_val = F.nll_loss(output[idx_val], labels[idx_val]) acc_val = accuracy(output[idx_val], labels[idx_val]) print( "Epoch: {:04d}".format(epoch + 1), "loss_train: {:.4f}".format(loss_train.data.item()), "acc_train: {:.4f}".format(acc_train.data.item()), "loss_val: {:.4f}".format(loss_val.data.item()), "acc_val: {:.4f}".format(acc_val.data.item()), "time: {:.4f}s".format(time.time() - t), ) return loss_val.data.item() # Train model t_total = time.time() loss_values = [] bad_counter = 0 best = self.args.epochs + 1 best_epoch = 0 for epoch in range(self.args.epochs): loss_values.append(train(epoch)) torch.save(model.state_dict(), "{}.pkl".format(epoch)) if loss_values[-1] < best: best = loss_values[-1] best_epoch = epoch bad_counter = 0 else: bad_counter += 1 if bad_counter == self.args.patience: break files = glob.glob("*.pkl") for file in files: epoch_nb = int(file.split(".")[0]) if epoch_nb < best_epoch: os.remove(file) files = glob.glob("*.pkl") for file in files: epoch_nb = int(file.split(".")[0]) if epoch_nb > best_epoch: os.remove(file) print("Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) # Restore best model print("Loading {}th epoch".format(best_epoch)) model.load_state_dict(torch.load("{}.pkl".format(best_epoch))) self.model = model return model
def main(): args.data_dir = os.path.join(args.data_dir, args.dataset) args.output_dir = os.path.join(args.output_dir, args.dataset) if os.path.exists(args.output_dir) and os.listdir(args.output_dir): print("Output directory ({}) already exists and is not empty.".format( args.output_dir)) else: os.makedirs(args.output_dir, exist_ok=True) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if CUDA: args.use_cuda = CUDA torch.cuda.manual_seed_all(args.seed) torch.backends.cudnn.deterministic = True print("args = ", args) ori_model = 'None' ori_load = True for idx in range(args.N): data_idx = idx # Load data adj, features, labels, idx_train, idx_val, idx_test, test_sub_idx, ori_adj, ori_idx_train, ori_idx_valid = \ load_data(args, data_idx, base_path=args.data_dir, dataset=args.dataset) file_name = "model_name_" + str(args.model_name) + "_lr_" + str( args.lr) + "_epochs_" + str(args.epochs) + "_k_factors_" + str( args.k_factors) + "_up_bound_" + str( args.up_bound) + "_top_n_" + str( args.top_n) + "_att_lr_" + str( args.att_lr) + "_hidden_" + str( args.hidden) + "_w1_" + str(args.w1) if args.all_data: model_path = os.path.join(args.output_dir, file_name) else: model_path = os.path.join(args.output_dir, str(data_idx), file_name) if not os.path.exists(model_path): os.makedirs(model_path) # Model and optimizer if args.model_name == "SpGAT": model = SpGAT(nfeat=features.shape[1], nhid=args.hidden, nclass=int(labels.max()) + 1, dropout=args.dropout, nheads=args.nb_heads, alpha=args.alpha) elif args.model_name == "SpGAT_2": model = SpGAT_2(nfeat=features.shape[1], nclass=int(labels.max()) + 1, config=args) elif args.model_name == "SpGAT2": model = SpGAT_2(nfeat=features.shape[1], nclass=int(labels.max()) + 1, config=args) else: model = GAT(nfeat=features.shape[1], nhid=args.hidden, nclass=int(labels.max()) + 1, dropout=args.dropout, nheads=args.nb_heads, alpha=args.alpha) print("load path", args.load) if args.load != 'None' and ori_load: model = load_model(model, args.load) print("model loaded") ori_load = False if ori_model != 'None': model = copy.deepcopy(ori_model) print("load model from", idx - 1) print(model.state_dict().keys()) if CUDA: model.cuda() features = Variable(features.cuda()) adj = Variable(adj.cuda()) labels = Variable(labels.cuda()) idx_train = idx_train.cuda() idx_val = idx_val.cuda() idx_test = idx_test.cuda() if "_" in args.model_name and not args.all_data and data_idx > 0 and ori_adj is not None: ori_adj = Variable(ori_adj.cuda()) ori_idx_train = ori_idx_train.cuda() ori_idx_valid = ori_idx_valid.cuda() loader = Corpus(features, adj, labels, idx_train, idx_val, idx_test, ori_adj, ori_idx_train, ori_idx_valid) for name, param in model.named_parameters(): if param.requires_grad == False: print("False", name) param.requires_grad = True best_epoch = 0 if args.evaluate == 0: best_epoch = train(model, model_path, loader, data_idx) ori_model = copy.deepcopy(model) evaluate(model, model_path, loader, data_idx, best_epoch=best_epoch, test_sub_idx=test_sub_idx) evaluate(model, model_path, loader, data_idx, best_epoch=best_epoch, test_sub_idx=test_sub_idx, best_or_final='final') args.load = os.path.join(model_path, 'trained_final.pth')
# Train model t_total = time.time() loss_values = [] bad_counter = 0 best = args.epochs + 1 best_epoch = 0 if not os.path.exists(f'./saved_models/{args.dataset}/'): os.mkdir(f'./saved_models/{args.dataset}/') for epoch in range(args.epochs): loss_values.append(train(epoch)) if loss_values[-1] < best: torch.save(model.state_dict(), f'./saved_models/{args.dataset}/best.pkl') best = loss_values[-1] best_epoch = epoch bad_counter = 0 else: bad_counter += 1 if bad_counter == args.patience: break # files = glob.glob(f'./saved_models/{args.dataset}/*.pkl') # for file in files: # epoch_nb = int(file.split('.')[0]) # if epoch_nb < best_epoch: # os.remove(file)
def train_pipeline(self, *args, custom_function=True, function=None): random.seed(self.args.seed) np.random.seed(self.args.seed) torch.manual_seed(self.args.seed) if self.args.cuda: torch.cuda.manual_seed(self.args.seed) # Load data adj, features, labels, idx_train, idx_val, idx_test = new_load_data( *args, custom_function=custom_function, function=function) # Model and optimizer if self.args.sparse: model = SpGAT(nfeat=features.shape[1], nhid=self.args.hidden, nclass=int(labels.max()) + 1, dropout=self.args.dropout, nheads=self.args.nb_heads, alpha=self.args.alpha) else: model = GAT(nfeat=features.shape[1], nhid=self.args.hidden, nclass=int(labels.max()) + 1, dropout=self.args.dropout, nheads=self.args.nb_heads, alpha=self.args.alpha) optimizer = optim.Adam(model.parameters(), lr=self.args.lr, weight_decay=self.args.weight_decay) if self.args.cuda: model.cuda() features = features.cuda() adj = adj.cuda() labels = labels.cuda() idx_train = idx_train.cuda() idx_val = idx_val.cuda() idx_test = idx_test.cuda() features, adj, labels = Variable(features), Variable(adj), Variable( labels) # TODO: Test if these lines could be written below line 41. self.adj = adj self.features = features self.labels = labels self.idx_train = idx_train self.idx_val = idx_val self.idx_test = idx_test def train(epoch): t = time.time() model.train() optimizer.zero_grad() output = model(features, adj) loss_train = F.nll_loss(output[idx_train], labels[idx_train]) acc_train = accuracy(output[idx_train], labels[idx_train]) loss_train.backward() optimizer.step() if not self.args.fastmode: # Evaluate validation set performance separately, # deactivates dropout during validation run. model.eval() output = model(features, adj) loss_val = F.nll_loss(output[idx_val], labels[idx_val]) acc_val = accuracy(output[idx_val], labels[idx_val]) print('Epoch: {:04d}'.format(epoch + 1), 'loss_train: {:.4f}'.format(loss_train.data.item()), 'acc_train: {:.4f}'.format(acc_train.data.item()), 'loss_val: {:.4f}'.format(loss_val.data.item()), 'acc_val: {:.4f}'.format(acc_val.data.item()), 'time: {:.4f}s'.format(time.time() - t)) return loss_val.data.item() # Train model t_total = time.time() loss_values = [] bad_counter = 0 best = self.args.epochs + 1 best_epoch = 0 for epoch in range(self.args.epochs): loss_values.append(train(epoch)) torch.save(model.state_dict(), '{}.pkl'.format(epoch)) if loss_values[-1] < best: best = loss_values[-1] best_epoch = epoch bad_counter = 0 else: bad_counter += 1 if bad_counter == self.args.patience: break files = glob.glob('*.pkl') for file in files: epoch_nb = int(file.split('.')[0]) if epoch_nb < best_epoch: os.remove(file) files = glob.glob('*.pkl') for file in files: epoch_nb = int(file.split('.')[0]) if epoch_nb > best_epoch: os.remove(file) print("Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) # Restore best model print('Loading {}th epoch'.format(best_epoch)) model.load_state_dict(torch.load('{}.pkl'.format(best_epoch))) self.model = model return model
def __init__(self,graph,sparse = False,epochs = 200,learning_rate = 0.005, weight_decay = 5e-4,hidden = 8,nb_heads = 8,drop_out = 0.6, alpha = 0.2 ,patience = 100,train = 1500,val = 2000,test = 3100): self.graph = graph self.sparse = sparse self.epochs = epochs self.learning_rate = learning_rate self.weight_decay = weight_decay self.hidden = hidden self.nb_heads = nb_heads self.drop_out = drop_out self.alpha = alpha self.patience = patience self.train = train self.val = val self.test = test idx_train,idx_val , idx_test = self.load_data() random.seed(random_seed) np.random.seed(random_seed) torch.manual_seed(random_seed) if self.sparse: model = SpGAT(nfeat=self.features.shape[1], nhid=self.hidden, nclass=int(self.labels.max()) + 1, dropout=self.drop_out, nheads=self.nb_heads, alpha=self.alpha) else: model = GAT(nfeat=self.features.shape[1], nhid=self.hidden, nclass=int(self.labels.max()) + 1, dropout=self.drop_out, nheads=self.nb_heads, alpha=self.alpha) optimizer = optim.Adam(model.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay) #利用GPU # device = torch.device("cuda:0") # torch.cuda.empty_cache() # model.to(device) # self.features = self.features.to(device) # self.adj = self.adj.to(device) # self.labels = self.labels.to(device) # idx_train = idx_train.to(device) # idx_val = idx_val.to(device) # idx_test = idx_test.to(device) features, adj, labels = Variable(self.features), Variable(self.adj), Variable(self.labels) t_total = time.time() loss_values = [] bad_counter = 0 best = self.epochs + 1 best_epoch = 0 for epoch in range(self.epochs): t = time.time() model.train() optimizer.zero_grad() output = model(features, adj) loss_train = F.nll_loss(output[idx_train], labels[idx_train]) acc_train = accuracy(output[idx_train], labels[idx_train]) loss_train.backward() optimizer.step() model.eval() output = model(features, adj) loss_val = F.nll_loss(output[idx_val], labels[idx_val]) acc_val = accuracy(output[idx_val], labels[idx_val]) print('Epoch: {:04d}'.format(epoch + 1), 'loss_train: {:.4f}'.format(loss_train.data), 'acc_train: {:.4f}'.format(acc_train.data), 'loss_val: {:.4f}'.format(loss_val.data), 'acc_val: {:.4f}'.format(acc_val.data), 'time: {:.4f}s'.format(time.time() - t)) loss_values.append(loss_val.data) torch.save(model.state_dict(), '{}.pkl'.format(epoch)) if loss_values[-1] < best: best = loss_values[-1] best_epoch = epoch bad_counter = 0 else: bad_counter += 1 if bad_counter == self.patience: break files = glob.glob('*.pkl') for file in files: epoch_nb = int(file.split('.')[0]) if epoch_nb < best_epoch: os.remove(file) print("Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) print('Loading {}th epoch'.format(best_epoch)) model.load_state_dict(torch.load('{}.pkl'.format(best_epoch))) model.eval() output = model(features, adj) loss_test = F.nll_loss(output[idx_test], labels[idx_test]) acc_test = accuracy(output[idx_test], labels[idx_test]) print("Test set results:", "loss= {:.4f}".format(loss_test.data), "accuracy= {:.4f}".format(acc_test.data))