epoch_accuracy = [] train_accuracy = [] epochs = [] correct = 0 try: for epoch in range(1, args.num_epochs + 1): train_acc = 0 length = 0 loss_train = 0 for i, (X_train, Y_train) in enumerate(train_loader): X_train = torch.tensor(X[X["QuestionId"].isin(X_train)]['PairId'].values) Y_train = torch.tensor(X[X["QuestionId"].isin(Y_train)]['Credible'].values) gcn_model.train(True) gcn_optimizer.zero_grad() user_gcn_embed = gcn_model(X_Tags_Feature, Adj2) user_gcn_embed.squeeze_() #print "GCN EMbeddings", user_gcn_embed predicted = torch.max(user_gcn_embed[X_train], 1)[1].data train_acc+= (Y_train.numpy()==predicted.numpy()).sum() length+= len(Y_train) Y_train = Variable(Y_train, requires_grad=False) Y_train2 = Variable(torch.FloatTensor(np.ones(len(Y_train))), requires_grad=False) predicted = torch.max(user_gcn_embed, 1)[1].data loss = criterion(user_gcn_embed[X_train], Y_train)#+criterion2(SparseMM(Adj3)(torch.unsqueeze(predicted, dim=1).float()).squeeze()[X_train], Y_train2) loss.backward() gcn_optimizer.step()
feature = torch.sparse.FloatTensor(i.t(), v, features[2]).to(device) i = torch.from_numpy(supports[0]).long().to(device) v = torch.from_numpy(supports[1]).to(device) support = torch.sparse.FloatTensor(i.t(), v, supports[2]).float().to(device) print('x :', feature) print('sp:', support) num_features_nonzero = feature._nnz() feat_dim = feature.shape[1] net = GCN(feat_dim, num_classes, num_features_nonzero) net.to(device) optimizer = optim.Adam(net.parameters(), lr=args.learning_rate) net.train() for epoch in range(args.epochs): out = net((feature, support)) out = out[0] loss = masked_loss(out, train_label, train_mask) loss += args.weight_decay * net.l2_loss() acc = masked_acc(out, train_label, train_mask) optimizer.zero_grad() loss.backward() optimizer.step() if epoch % 10 == 0:
def run_statistic(threshold): ''' evaluate on small images result ''' # dataset dataset = IDRiD_sub1_dataset(data_dir) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=4) #print('Data: %d'%(len(dataset))) #model model = GCN(4, 512) if use_gpu: model = model.cuda() #model = torch.nn.DataParallel(model).cuda() model.load_state_dict(torch.load(os.path.join(save_dir, model_name))) model.train(False) for i in range(4): y_pred_list = [] y_true_list = [] for idx, data in enumerate(dataloader): images, masks, names = data if use_gpu: images = images.cuda() masks = masks.cuda() images, masks = Variable(images, volatile=True), Variable(masks, volatile=True) #forward outputs = model(images) # statistics outputs = F.sigmoid( outputs).cpu().data #remenber to apply sigmoid befor usage masks = masks.cpu().data #for i in range(len(outputs)): y_pred = outputs[i] y_true = masks[i] y_pred = y_pred.numpy().flatten() y_pred = np.where(y_pred > threshold, 1, 0) y_true = y_true.numpy().flatten() y_pred_list.append(y_pred) y_true_list.append(y_true) #verbose if idx % 5 == 0 and idx != 0: print('\r{:.2f}%'.format(100 * idx / len(dataloader)), end='\r') #print() type_list = ['MA', 'EX', 'HE', 'SE'] precision, recall, f1, _ = precision_recall_fscore_support( np.array(y_true_list).flatten(), np.array(y_pred_list).flatten(), average='binary') print( '{} \nThreshold: {:.2f}\nPrecision: {:.4f}\nRecall: {:.4f}\nF1: {:.4f}' .format(type_list[i], threshold, precision, recall, f1))
model = GCN(dfeat, nhid, nclass, dropout) optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd) # Data to cuda? model = model.to(device) X = X.to(device) A = A.to(device) y = y.to(device) train_idx = train_idx.to(device) val_idx = val_idx.to(device) test_idx = test_idx.to(device) print("Start training the model.............") start = time.time() for epoch in range(epochs): model.train() optimizer.zero_grad() output = model(X, A) train_loss = F.nll_loss(output[train_idx], torch.max(y[train_idx], 1)[1]) train_accuracy = accuracy(output[train_idx], torch.max(y[train_idx], 1)[1]) train_loss.backward() optimizer.step() model.eval() output = model(X, A) val_loss = F.nll_loss(output[val_idx], torch.max(y[val_idx], 1)[1]) val_accuracy = accuracy(output[val_idx], torch.max(y[val_idx], 1)[1]) print('Epoch: {:04d}'.format(epoch + 1), 'train loss: {:.4f}'.format(train_loss), 'train accuracy: {:.4f}%'.format(train_accuracy),
# @Time : 2020/9/28 10:49 AM # @Author : huangyajian # @File : train.py # @Software : PyCharm # @Comment : # Reference:********************************************** from data_loader import gcn_load_data from model import GCN load_data_function = gcn_load_data gcn_model = GCN(load_data_function=load_data_function, hidden_unit=16, learning_rate=0.01, weight_decay=5e-4) cost_val = [] epochs = 200 for i in range(epochs): train_loss, train_acc = gcn_model.train() # print("model loss: {}, model acc: {}".format(train_loss, train_acc)) gcn_model.update() # val step val_loss, val_acc = gcn_model.eval() print( "iteration: {}, train_loss: {}, train_acc: {}, val_loss: {}, val_acc: {}" .format(i, train_loss, train_acc, val_loss, val_acc)) cost_val.append(val_loss) test_loss, test_acc = gcn_model.test() print("start test, the loss: {}, the acc: {}".format(test_loss, test_acc))
def show_image_sample(): # dataset dataset = IDRiD_sub1_dataset(data_dir) #model model = GCN(4, 512) if use_gpu: model = model.cuda() #model = torch.nn.DataParallel(model).cuda() model.load_state_dict(torch.load(os.path.join(save_dir, model_name))) model.train(False) for n in range(12): #test full_image = np.zeros((3, 2848, 4288), dtype='float32') full_mask = np.zeros((4, 2848, 4288), dtype='float32') full_output = np.zeros((4, 2848, 4288), dtype='float32') #(C, H, W) title = '' for idx in range(9 * 6 * n, 9 * 6 * (n + 1)): image, mask, name = dataset[idx] n = int(idx / (6 * 9)) #image index r = int((idx % (6 * 9)) / 9) #row c = (idx % (6 * 9)) % 9 #column title = name[:-8] if use_gpu: image = image.cuda() mask = mask.cuda() image, mask = Variable(image, volatile=True), Variable(mask, volatile=True) #forward output = model(image.unsqueeze(0)) output = F.sigmoid(output) output = output[0] if c < 8: if r == 5: full_output[:, r * 512:r * 512 + 512 - 224, c * 512:c * 512 + 512] = output.cpu().data.numpy()[:, :-224, :] full_mask[:, r * 512:r * 512 + 512 - 224, c * 512:c * 512 + 512] = mask.cpu().data.numpy()[:, :-224, :] full_image[:, r * 512:r * 512 + 512 - 224, c * 512:c * 512 + 512] = image.cpu().data.numpy()[:, :-224, :] else: full_output[:, r * 512:r * 512 + 512, c * 512:c * 512 + 512] = output.cpu().data.numpy() full_mask[:, r * 512:r * 512 + 512, c * 512:c * 512 + 512] = mask.cpu().data.numpy() full_image[:, r * 512:r * 512 + 512, c * 512:c * 512 + 512] = image.cpu().data.numpy() full_image = full_image.transpose(1, 2, 0) MA = full_output[0] EX = full_output[1] HE = full_output[2] SE = full_output[3] plt.figure() plt.axis('off') plt.suptitle(title) plt.subplot(331) plt.title('image') fig = plt.imshow(full_image) fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) plt.subplot(332) plt.title('ground truth MA') fig = plt.imshow(full_mask[0]) fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) plt.subplot(333) plt.title('ground truth EX') fig = plt.imshow(full_mask[1]) fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) plt.subplot(334) plt.title('ground truth HE') fig = plt.imshow(full_mask[2]) fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) plt.subplot(335) plt.title('ground truth SE') fig = plt.imshow(full_mask[3]) fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) plt.subplot(336) plt.title('predict MA') fig = plt.imshow(MA) fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) plt.subplot(337) plt.title('predict EX') fig = plt.imshow(EX) fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) plt.subplot(338) plt.title('predict HE') fig = plt.imshow(HE) fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) plt.subplot(339) plt.title('predict SE') fig = plt.imshow(SE) fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) plt.show()
class ModelRunner: def __init__(self, params, logger, data_logger=None, epochs_logger=None): self._logger = logger self._epoch_logger = epochs_logger self._data_logger = EmptyLogger() if data_logger is None else data_logger self._parameters = params self._lr = params["lr"] self._is_nni = params['is_nni'] self._device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') self._mse_loss = self.weighted_mse_loss self._temporal_loss = self.weighted_mse_loss self.model = GCN(num_of_features=self._parameters["feature_matrices"][0].shape[1], hid_size=self._parameters["hid_size"], num_of_classes=self._parameters["number_of_classes"], activation=self._parameters["activation"], dropout=self._parameters["dropout"]) self.model = self.model.to(self._device) self.opt = self._parameters["optimizer"](self.model.parameters(), lr=self._parameters['lr'], weight_decay=self._parameters['weight_decay']) @property def logger(self): return self._logger @property def data_logger(self): return self._data_logger def weighted_mse_loss(self, pred, target, weights=None): if weights is None: return ((pred - target) ** 2).sum(dim=1).sum().to(self._device) elif self._parameters['loss_weights_type'] == 'sqrt(N/Nj)': weights = torch.tensor(weights).to(device=self._device, dtype=torch.float) b = (torch.sqrt((weights).sum() / weights) *(pred - target) ** 2).sum(dim=1).sum().to(self._device) return b elif self._parameters['loss_weights_type'] == '1/Njs': weights = torch.tensor(weights).to(device=self._device, dtype=torch.float) b = (torch.tensor(1. / weights) * (pred - target) ** 2).sum(dim=1).sum().to(self._device) return b def run(self): train_results_l = [] test_results = [] # train for epoch in range(self._parameters["epochs"]): train_results = self.train(epoch) train_results_l.append(train_results) if epoch == self._parameters["epochs"]-1: # for grid test_res = self.test(epoch) test_results.append(test_res) if self._is_nni: nni.report_intermediate_result(test_res["f1_score_macro"][-1]) else: print(self._parameters["it_num"], self._parameters["iterations"], epoch + 1, self._parameters["epochs"], self._parameters["lr"], self._parameters["dropout"], self._parameters["hid_size"], self._parameters["weight_decay"], self._parameters["temporal_pen"], train_results['loss'].item(), train_results['tempo_loss'].item(), train_results['loss'].item() + train_results['tempo_loss'].item(), train_results['f1_score_macro'][-1], train_results['f1_score_micro'][-1], test_res["loss"], test_res["tempo_loss"], test_res["loss"] + test_res["tempo_loss"], test_res["f1_score_macro"][-1], test_res["f1_score_micro"][-1]) self._logger.debug('Epoch: {:04d} '.format(epoch + 1) + 'lr: {:04f} '.format(self._parameters['lr']) + 'dropout: {:04f} '.format(self._parameters['dropout']) + 'hid_size: {:04f} '.format(self._parameters['hid_size']) + 'weight_decay: {:04f} '.format(self._parameters['weight_decay']) + 'temporal_pen: {:04f} '.format(self._parameters['temporal_pen']) + 'reg_loss_train: {:.4f} '.format(train_results['loss']) + 'temp_loss: {:.4f} '.format(train_results['tempo_loss'])) result = self.test('test', print_to_file=True) if self._is_nni: nni.report_final_result(result["f1_score_macro"]) return train_results_l, test_results, result, self._parameters def train(self, epoch): z_vals, outputs = [], [] labeled_indices = self._parameters['training_inds'] labels = self._parameters['training_labels'] tempo_loss = 0. loss_train = 0. self.model.train() self.opt.zero_grad() for idx, adj in enumerate(self._parameters["adj_matrices"]): input_features = torch.from_numpy(self._parameters["feature_matrices"][idx]).to(dtype=torch.float, device=self._device) z, output = self.model(input_features, adj) output = output[labeled_indices[idx], :] # Njs are the weights of the loss using the adj mx. # they should be either used or not. Nj_s = [sum([labels[u][t][j] for u in range(len(self._parameters["adj_matrices"])) for t in range(len(labels[u]))]) for j in range(self._parameters['number_of_classes'])] loss_train += self._mse_loss(output, labels[idx], Nj_s) # loss_train += self._mse_loss(output, labels[idx].float()) #without weights using the build-in mse z_vals.append(z) # After 1 GCN layer outputs.append(output) # Final predictions # counts the number of cross_year_persons z_appearances = 0. for t in range(len(z_vals) - 1): t_inds = self._parameters['training_inds'][t] t_plus_one_inds = self._parameters['training_inds'][t + 1] z_inds = [i for i in t_inds if i in t_plus_one_inds] z_appearances += len(z_inds) z_val_t = z_vals[t][z_inds, :] z_val_t_plus_1 = z_vals[t+1][z_inds, :] loss = self._temporal_loss(z_val_t_plus_1, z_val_t) tempo_loss += self._parameters["temporal_pen"] * loss tempo_loss /= z_appearances loss_train /= sum([len(labeled_indices[u]) for u in range(len(outputs))]) total_loss = loss_train + tempo_loss total_loss.backward() self.opt.step() f1_score_macro, f1_score_micro = [],[] if epoch == self._parameters['epochs']-1: for i in range(len(labels)): f1_mac, f1_mic, list_real, list_pred = self.accuracy_f1_score(outputs[i], labels[i]) f1_score_macro.append(f1_mac) f1_score_micro.append(f1_mic) result = {"loss": loss_train, "f1_score_macro": f1_score_macro, "f1_score_micro": f1_score_micro, "tempo_loss": tempo_loss} return result def test(self,epoch, print_to_file=False): z_vals, outputs = [], [] labeled_indices = self._parameters['test_inds'] labels = self._parameters['test_labels'] tempo_loss = 0. loss_test = 0. test_z_appearances = 0. self.model.eval() for idx, adj in enumerate(self._parameters["adj_matrices"]): test_mat = torch.from_numpy(self._parameters["feature_matrices"][idx]).to(self._device) z, output = self.model(*[test_mat, adj]) output = output[labeled_indices[idx], :] loss_test += self._mse_loss(output, labels[idx].float()) z_vals.append(z) outputs.append(output) if print_to_file: grid_outputs_folder = str(self._parameters['name']) self._logger.debug("\nprint to files") for i in range(len(self._parameters["adj_matrices"])): np_output = outputs[i].cpu().data.numpy() products_path = os.path.join(os.getcwd(),'dataset',self._parameters["dataset_name"], "gcn_outputs",grid_outputs_folder) if not os.path.exists(products_path): os.makedirs(products_path) with open(os.path.join("dataset",self._parameters["dataset_name"],"gcn_outputs",grid_outputs_folder, "gcn_" + str(i) + ".pkl"), "wb") as f: pickle.dump(np_output, f, protocol=pickle.HIGHEST_PROTOCOL) for t in range(len(z_vals) - 1): t_inds = self._parameters['test_inds'][t] t_plus_one_inds = self._parameters['test_inds'][t + 1] z_inds = [i for i in t_inds if i in t_plus_one_inds] test_z_appearances += len(z_inds) z_val_t = z_vals[t][z_inds, :] z_val_t_plus_1 = z_vals[t + 1][z_inds, :] tempo_loss += self._parameters["temporal_pen"] * self._temporal_loss(z_val_t_plus_1, z_val_t) tempo_loss /= test_z_appearances loss_test /= sum([len(labeled_indices[u]) for u in range(len(outputs))]) f1_score_macro, f1_score_micro = [], [] real,pred = [],[] if epoch == self._parameters['epochs'] - 1 or epoch == 'test': for i in range(len(labels)): #running over the years f1_mac, f1_mic, list_real, list_pred = self.accuracy_f1_score(outputs[i], labels[i]) f1_score_macro.append(f1_mac) f1_score_micro.append(f1_mic) real.extend(list_real) pred.extend(list_pred) self.confusion_matrix(real, pred) # of all years normalized to 1 for the last epoch test result = {"loss": loss_test.data.item(), "f1_score_macro": f1_score_macro, "f1_score_micro": f1_score_micro, "tempo_loss": tempo_loss.data.item()} return result def accuracy_f1_score(self,output, labels): pred, real = [],[] for person in range(labels.size(0)): #range of all persons for label in range(labels.size(1)): if labels[person,label]==0: continue else: argmax = output[person].max(0)[1] real.append(label) pred.append(argmax.cpu().item()) f1_macro = f1_score(real, pred, average='macro') f1_micro = f1_score(real, pred, average='micro') return f1_macro, f1_micro, real,pred def confusion_matrix(self, list_real, list_pred): matrix = np.zeros((self._parameters["number_of_classes"], self._parameters["number_of_classes"])) # classes X classes for i in range(len(list_pred)): matrix[list_real[i], list_pred[i]] += 1 row_sums = matrix.sum(axis=1, dtype='float') new_matrix = np.zeros((self._parameters["number_of_classes"], self._parameters["number_of_classes"])) # classes X classes for i, (row, row_sum) in enumerate(zip(matrix, row_sums)): if row_sum == 0: new_matrix[i, :] = 0 else: new_matrix[i, :] = row / row_sum new_matrix = np.around(new_matrix, 3) b = np.asarray(new_matrix) self._parameters['diag_sum'] = np.trace(b) self._parameters['diag_elements'] = np.diagonal(b) print('Diagonal (sum): ', np.trace(b)) print('Diagonal (elements): ', np.diagonal(b)) fig = plt.figure() ax = fig.add_subplot(111) cax = ax.matshow(new_matrix, interpolation='nearest') fig.colorbar(cax) ax.set_yticks(plt.np.arange(self._parameters["number_of_classes"])) ax.set_yticklabels(i for i in range(self._parameters["number_of_classes"])) ax.set_xticks(plt.np.arange(self._parameters["number_of_classes"])) ax.set_xticklabels(i for i in range(self._parameters["number_of_classes"])) ax.tick_params(axis='y', labelsize=7) ax.tick_params(axis='x', labelsize=7, labelbottom=True, labeltop=False) plt.title('Confusion matrix') ax.axis('image') plt.xlabel("Predicted label") plt.ylabel("Real label") mypath = "./dataset/"+self._parameters["dataset_name"]+"/figures" if not os.path.exists(mypath): os.makedirs(mypath) plt.savefig("./dataset/"+self._parameters["dataset_name"]+"/figures/cofution_matrix"+str(self._parameters['name'])+time.strftime("%Y%m%d_%H%M%S")+".png") plt.clf() plt.close() return
def train_gcn(training_features, training_adjs, training_labels, eval_features, eval_adjs, eval_labels, params, class_weights, activations, unary, coeffs, graph_params): device = torch.device( 'cuda:1') if torch.cuda.is_available() else torch.device('cpu') gcn = GCN(n_features=training_features[0].shape[1], hidden_layers=params["hidden_layers"], dropout=params["dropout"], activations=activations, p=graph_params["probability"], normalization=params["edge_normalization"]) gcn.to(device) opt = params["optimizer"](gcn.parameters(), lr=params["lr"], weight_decay=params["regularization"]) n_training_graphs = len(training_labels) graph_size = graph_params["vertices"] n_eval_graphs = len(eval_labels) counter = 0 # For early stopping min_loss = None for epoch in range(params["epochs"]): # -------------------------- TRAINING -------------------------- training_graphs_order = np.arange(n_training_graphs) np.random.shuffle(training_graphs_order) for i, idx in enumerate(training_graphs_order): training_mat = torch.tensor(training_features[idx], device=device) training_adj, training_lbs = map( lambda x: torch.tensor( data=x[idx], dtype=torch.double, device=device), [training_adjs, training_labels]) gcn.train() opt.zero_grad() output_train = gcn(training_mat, training_adj) output_matrix_flat = ( torch.mm(output_train, output_train.transpose(0, 1)) + 1 / 2).flatten() training_criterion = gcn_build_weighted_loss( unary, class_weights, training_lbs) loss_train = coeffs[0] * training_criterion(output_train.view(output_train.shape[0]), training_lbs) + \ coeffs[1] * gcn_pairwise_loss(output_matrix_flat, training_adj.flatten()) + \ coeffs[2] * gcn_binomial_reg(output_train, graph_params) loss_train.backward() opt.step() # -------------------------- EVALUATION -------------------------- graphs_order = np.arange(n_eval_graphs) np.random.shuffle(graphs_order) outputs = torch.zeros(graph_size * n_eval_graphs, dtype=torch.double) output_xs = torch.zeros(graph_size**2 * n_eval_graphs, dtype=torch.double) adj_flattened = torch.tensor( np.hstack([eval_adjs[idx].flatten() for idx in graphs_order])) for i, idx in enumerate(graphs_order): eval_mat = torch.tensor(eval_features[idx], device=device) eval_adj, eval_lbs = map( lambda x: torch.tensor( data=x[idx], dtype=torch.double, device=device), [eval_adjs, eval_labels]) gcn.eval() output_eval = gcn(eval_mat, eval_adj) output_matrix_flat = ( torch.mm(output_eval, output_eval.transpose(0, 1)) + 1 / 2).flatten() output_xs[i * graph_size**2:(i + 1) * graph_size**2] = output_matrix_flat.cpu() outputs[i * graph_size:(i + 1) * graph_size] = output_eval.view( output_eval.shape[0]).cpu() all_eval_labels = torch.tensor(np.hstack( [eval_labels[idx] for idx in graphs_order]), dtype=torch.double) eval_criterion = gcn_build_weighted_loss(unary, class_weights, all_eval_labels) loss_eval = ( coeffs[0] * eval_criterion(outputs, all_eval_labels) + coeffs[1] * gcn_pairwise_loss(output_xs, adj_flattened) + coeffs[2] * gcn_binomial_reg(outputs, graph_params)).item() if min_loss is None: current_min_loss = loss_eval else: current_min_loss = min(min_loss, loss_eval) if epoch >= 10 and params[ "early_stop"]: # Check for early stopping during training. if min_loss is None: min_loss = current_min_loss torch.save(gcn.state_dict(), "tmp_time.pt") # Save the best state. elif loss_eval < min_loss: min_loss = current_min_loss torch.save(gcn.state_dict(), "tmp_time.pt") # Save the best state. counter = 0 else: counter += 1 if counter >= 40: # Patience for learning break # After stopping early, our model is the one with the best eval loss. gcn.load_state_dict(torch.load("tmp_time.pt")) os.remove("tmp_time.pt") return gcn
def train_classification_gcn(self, Adj, features, nfeats, labels, nclasses, train_mask, val_mask, test_mask, args): model = GCN(in_channels=nfeats, hidden_channels=args.hidden, out_channels=nclasses, num_layers=args.nlayers, dropout=args.dropout2, dropout_adj=args.dropout_adj2, Adj=Adj, sparse=args.sparse) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.w_decay) bad_counter = 0 best_val = 0 best_model = None best_loss = 0 best_train_loss = 0 if torch.cuda.is_available(): model = model.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() features = features.cuda() labels = labels.cuda() for epoch in range(1, args.epochs + 1): model.train() loss, accu = self.get_loss_fixed_adj(model, train_mask, features, labels) optimizer.zero_grad() loss.backward() optimizer.step() if epoch % 10 == 0: model.eval() val_loss, accu = self.get_loss_fixed_adj( model, val_mask, features, labels) if accu > best_val: bad_counter = 0 best_val = accu best_model = copy.deepcopy(model) best_loss = val_loss best_train_loss = loss else: bad_counter += 1 if bad_counter >= args.patience: break print("Val Loss {:.4f}, Val Accuracy {:.4f}".format( best_loss, best_val)) best_model.eval() test_loss, test_accu = self.get_loss_fixed_adj(best_model, test_mask, features, labels) print("Test Loss {:.4f}, Test Accuracy {:.4f}".format( test_loss, test_accu)) return best_val, test_accu, best_model
def main(args): # 0. initial setting # set environmet cudnn.benchmark = True if not os.path.isdir(os.path.join(args.path, './ckpt')): os.mkdir(os.path.join(args.path, './ckpt')) if not os.path.isdir(os.path.join(args.path, './results')): os.mkdir(os.path.join(args.path, './results')) if not os.path.isdir(os.path.join(args.path, './ckpt', args.name)): os.mkdir(os.path.join(args.path, './ckpt', args.name)) if not os.path.isdir(os.path.join(args.path, './results', args.name)): os.mkdir(os.path.join(args.path, './results', args.name)) if not os.path.isdir(os.path.join(args.path, './results', args.name, "log")): os.mkdir(os.path.join(args.path, './results', args.name, "log")) # set logger logger = logging.getLogger() logger.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(message)s') handler = logging.FileHandler( os.path.join( args.path, "results/{}/log/{}.log".format( args.name, time.strftime('%c', time.localtime(time.time()))))) handler.setFormatter(formatter) logger.addHandler(handler) logger.addHandler(logging.StreamHandler()) args.logger = logger # set cuda if torch.cuda.is_available(): args.logger.info("running on cuda") args.device = torch.device("cuda") args.use_cuda = True else: args.logger.info("running on cpu") args.device = torch.device("cpu") args.use_cuda = False args.logger.info("[{}] starts".format(args.name)) # 1. load data adj, features, labels, idx_train, idx_val, idx_test = load_data() # 2. setup CORA_NODES = 2708 CORA_FEATURES = 1433 CORA_CLASSES = 7 CITESEER_NODES = 3327 CITESEER_FEATURES = 3703 CITESEER_CLASSES = 6 (num_nodes, feature_dim, classes) = (CORA_NODES, CORA_FEATURES, CORA_CLASSES) if args.dataset == 'cora' else ( CITESEER_NODES, CITESEER_FEATURES, CITESEER_CLASSES) args.logger.info("setting up...") model = GCN(args, feature_dim, args.hidden, classes, args.dropout) if args.model == 'gcn' else SpGAT( args, feature_dim, args.hidden, classes, args.dropout, args.alpha, args.n_heads) model.to(args.device) loss_fn = nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) if args.load: loaded_data = load(args, args.ckpt) model.load_state_dict(loaded_data['model']) optimizer.load_state_dict(loaded_data['optimizer']) # 3. train / test if not args.test: # train args.logger.info("starting training") train_loss_meter = AverageMeter(args, name="Loss", save_all=True, x_label="epoch") val_acc_meter = AverageMeter(args, name="Val Acc", save_all=True, x_label="epoch") earlystop_listener = val_acc_meter.attach_combo_listener( (lambda prev, new: prev.max >= new.max), threshold=args.patience) steps = 1 for epoch in range(1, 1 + args.epochs): spent_time = time.time() model.train() train_loss_tmp_meter = AverageMeter(args) if args.start_from_step is not None: if steps < args.start_from_step: steps += 1 continue optimizer.zero_grad() batch = len(idx_train) output = model(features.to(args.device), adj.to(args.device)) loss = loss_fn(output[idx_train], labels[idx_train].to(args.device)) loss.backward() optimizer.step() train_loss_tmp_meter.update(loss, weight=batch) steps += 1 train_loss_meter.update(train_loss_tmp_meter.avg) spent_time = time.time() - spent_time args.logger.info( "[{}] train loss: {:.3f} took {:.1f} seconds".format( epoch, train_loss_tmp_meter.avg, spent_time)) model.eval() spent_time = time.time() if not args.fastmode: with torch.no_grad(): output = model(features.to(args.device), adj.to(args.device)) acc = accuracy(output[idx_val], labels[idx_val]) * 100.0 val_acc_meter.update(acc) earlystop = earlystop_listener.listen() spent_time = time.time() - spent_time args.logger.info( "[{}] val acc: {:2.1f} % took {:.1f} seconds".format( epoch, acc, spent_time)) if steps % args.save_period == 0: save(args, "epoch{}".format(epoch), {'model': model.state_dict()}) train_loss_meter.plot(scatter=False) val_acc_meter.plot(scatter=False) val_acc_meter.save() if earlystop: break else: # test args.logger.info("starting test") model.eval() with torch.no_grad(): model(features.to(args.device), adj.to(args.device)) acc = accuracy(output[idx_test], labels[idx_test]) * 100 logger.d("test acc: {:2.1f} % took {:.1f} seconds".format( acc, spent_time))
y_val = torch.tensor(y_val, dtype=torch.long, requires_grad=False) y_test = torch.tensor(y_test, dtype=torch.long, requires_grad=False) load_from = False gcn = GCN(embsize, hidsize, nclass, weight_decay) if load_from: gcn = torch.load(load_from) optimizer = torch.optim.Adam(gcn.parameters(recurse=True), lr=lr, weight_decay=weight_decay) train_time = time.time() val_loss_pre = 1e9 val_acc_pre = 0 dec_time = 0 for epoch in range(args.epoch): t = time.time() gcn.train() optimizer.zero_grad() output = gcn(features, norm_adj) pred = output[train_mask] ans = torch.argmax(y_train[train_mask],dim=1) loss = F.cross_entropy(pred, ans) train_acc = cal_accuracy(output, y_train, train_mask) loss.backward() optimizer.step() #print(torch.min(pred), torch.max(pred)) gcn.eval() pred = output[val_mask] ans = torch.argmax(y_train[val_mask],dim=1) val_loss = F.cross_entropy(pred, ans) val_acc = cal_accuracy(output, y_val, val_mask)