def main(args): G = nx.read_edgelist(args.classifydir + '_edgelist.txt', nodetype=int) model = Model(nx.number_of_nodes(G), args.num_parts) adj = Variable(torch.FloatTensor(nx.adjacency_matrix(G).toarray()), requires_grad=False) if torch.cuda.is_available(): model = model.cuda() adj = adj.cuda() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.weight_decay) for epoch in tqdm(range(args.num_epochs)): model.zero_grad() super_adj = model(adj, temp=args.temp, hard=args.hard, beta=args.beta) loss = model.loss(super_adj, balance_node=args.balance_node, lam=args.lam) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() if epoch % 50 == 0: print("loss:", loss.item()) vectors = embed_arr_2_dict(model.params.cpu().detach().numpy(), G) accs = classify(vectors, args) print("micro:", accs['micro'], "macro:", accs['macro'])
def main(): global best_acc start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # load data print('==> Preparing dataset %s' % args.dataset) features, landmarks, labels = pickle_2_img_and_landmark(args.dataset_path) num_classes = 6 # Model print("==> creating model '{}'".format(args.arch)) # model = ResNetAndGCN(20, num_classes=num_classes) model = Model(36, 6, {}, False, dropout=0.3) # model = torch.nn.DataParallel(model).cuda() model = model.cuda() cudnn.benchmark = True print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) # print(' resnet params: %.2fM' % (sum(p.numel() for p in model.resnet.parameters())/1000000.0)) # print(' stgcn params: %.2fM' % (sum(p.numel() for p in model.st_gcn.parameters())/1000000.0)) criterion = nn.CrossEntropyLoss() # 分层优化 # resnet_para = [model.conv1.parameters(), model.layer1.parameters(), model.layer2.parameters(), model.layer3.parameters(), model.layer4.parameters()] # optimizer = optim.SGD([ # {'params': model.gcn11.parameters()}, # {'params': model.gcn12.parameters()}, # {'params': model.gcn21.parameters()}, # {'params': model.gcn22.parameters()}, # {'params': model.gcn31.parameters()}, # {'params': model.gcn32.parameters()}, # {'params': model.fc.parameters()}, # {'params': model.conv1.parameters(), 'lr': 0.005, 'weight_decay': 5e-3}, # {'params': model.bn1.parameters(), 'lr': 0.005, 'weight_decay': 5e-3}, # {'params': model.layer1.parameters(), 'lr': 0.005, 'weight_decay': 5e-3}, # {'params': model.layer2.parameters(), 'lr': 0.005, 'weight_decay': 5e-3}, # {'params': model.layer3.parameters(), 'lr': 0.005, 'weight_decay': 5e-3}, # {'params': model.layer4.parameters(), 'lr': 0.005, 'weight_decay': 5e-3}, # ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # Resume title = 'ckp-' + args.arch if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isfile( args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(args.checkpoint, 'log_stat.log'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, 'log_stat.log'), title=title) logger.set_names([ 'fold_num', 'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.' ]) # logging logging.basicConfig(level=logging.DEBUG, filename=os.path.join(args.checkpoint, 'log_info.log'), filemode='a+', format="%(asctime)-15s %(levelname)-8s %(message)s") # log configuration logging.info('-' * 10 + 'configuration' + '*' * 10) for arg in vars(args): logging.info((arg, str(getattr(args, arg)))) acc_fold = [] reset_lr = state['lr'] for f_num in range(args.folds): state['lr'] = reset_lr model.reset_all_weights() # optimizer = optim.SGD([ # {'params': model.gcn11.parameters()}, # {'params': model.gcn12.parameters()}, # {'params': model.gcn21.parameters()}, # {'params': model.gcn22.parameters()}, # {'params': model.gcn31.parameters()}, # {'params': model.gcn32.parameters()}, # {'params': model.fc.parameters()}, # {'params': model.conv1.parameters(), 'lr': 0.005, 'weight_decay': 5e-3}, # {'params': model.bn1.parameters(), 'lr': 0.005, 'weight_decay': 5e-3}, # {'params': model.layer1.parameters(), 'lr': 0.005, 'weight_decay': 5e-3}, # {'params': model.layer2.parameters(), 'lr': 0.005, 'weight_decay': 5e-3}, # {'params': model.layer3.parameters(), 'lr': 0.005, 'weight_decay': 5e-3}, # {'params': model.layer4.parameters(), 'lr': 0.005, 'weight_decay': 5e-3}, # ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) print(args.lr) # save each fold's acc and reset configuration average_acc = 0 best_acc = 0 # 10-fold cross validation train_x, train_lm, train_y = [], [], [] test_x, test_lm, test_y = [], [], [] for id_fold in range(args.folds): if id_fold == f_num: test_x = features[id_fold] test_lm = landmarks[id_fold] test_y = labels[id_fold] else: train_x = train_x + features[id_fold] train_lm = train_lm + landmarks[id_fold] train_y = train_y + labels[id_fold] # convert array to tensor train_x = torch.tensor(train_x, dtype=torch.float) / 255.0 #(b_s, 128, 128) train_x = train_x.unsqueeze(1) #(b_s, 1, 128, 128) train_lm = np.stack(train_lm) # 只要坐标信息, 不需要归一化 # train_lm = (train_lm - np.mean(train_lm, axis=0)) / np.std(train_lm, axis=0) train_lm = torch.tensor(train_lm, dtype=torch.float) # train_lm = train_lm.unsqueeze(2) test_x = torch.tensor(test_x, dtype=torch.float) / 255.0 test_x = test_x.unsqueeze(1) # 只要坐标信息, 不需要归一化 # test_lm = (test_lm - np.mean(test_lm, axis=0)) / np.std(test_lm, axis=0) test_lm = torch.tensor(test_lm, dtype=torch.float) # test_lm = test_lm.unsqueeze(2) train_y, test_y = torch.tensor(train_y), torch.tensor(test_y) train_dataset = torch.utils.data.TensorDataset(train_x, train_lm, train_y) train_iter = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.train_batch, shuffle=True) test_dataset = torch.utils.data.TensorDataset(test_x, test_lm, test_y) test_iter = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.test_batch, shuffle=False) # test for fold order print(len(test_dataset)) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(train_x + test_x, train_y + test_y, model, criterion, start_epoch, use_cuda) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) continue # show plt # plt.show(block=False) # Train and val for epoch in range(start_epoch, args.epochs): # 在特定的epoch 调整学习率 adjust_learning_rate(optimizer, epoch) # print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr'])) print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, optimizer.param_groups[0]['lr'])) train_loss, train_acc = train(train_iter, model, criterion, optimizer, epoch, use_cuda) test_loss, test_acc = test(test_iter, model, criterion, epoch, use_cuda) # append logger file logger.append([ f_num, state['lr'], train_loss, test_loss, train_acc, test_acc ]) # save model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, f_num, checkpoint=args.checkpoint) # compute average acc acc_fold.append(best_acc) average_acc = sum(acc_fold) / len(acc_fold) logging.info('fold: %d, best_acc: %.2f, average_acc: %.2f' % (f_num, best_acc, average_acc)) logger.close() # logger.plot() savefig(os.path.join(args.checkpoint, 'log.eps')) logging.info('acc_fold' + str(acc_fold)) print('average acc:') print(average_acc)
def train(**kwargs): if 'dataset' not in kwargs: opt = getattr(config, 'Office_Products_data_Config')() else: opt = getattr(config, kwargs['dataset'] + '_Config')() opt.parse(kwargs) random.seed(opt.seed) np.random.seed(opt.seed) torch.manual_seed(opt.seed) if opt.use_gpu: torch.cuda.manual_seed_all(opt.seed) if len(opt.gpu_ids) == 0 and opt.use_gpu: torch.cuda.set_device(opt.gpu_id) # 2 model model = Model(opt, getattr(methods, opt.model)) if opt.use_gpu: model.cuda() if len(opt.gpu_ids) > 0: model = nn.DataParallel(model, device_ids=opt.gpu_ids) if opt.load_ckp: assert len(opt.ckp_path) > 0 model.load(opt.ckp_path) # 3 data train_data = AmazonData(opt.data_root, train=True) train_data_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, collate_fn=collate_fn) test_data = AmazonData(opt.data_root, train=False) test_data_loader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, collate_fn=collate_fn) print('{}: train data: {}; test data: {}'.format(now(), len(train_data), len(test_data))) # 4 optimiezer # optimizer = optim.Adadelta(model.parameters(), rho=0.95, eps=1e-6, weight_decay=opt.weight_decay) # optimizer = optim.SGD(model.parameters(), lr=opt.lr, momentum=0.9, weight_decay=opt.weight_decay) optimizer = optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) # optimizer = optim.RMSprop(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.8) # scheduler_warmup = GradualWarmupScheduler(optimizer, multiplier=8, total_epoch=3, after_scheduler=scheduler) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8) # scheduler = optim.lr_scheduler.CyclicLR(optimizer, base_lr=opt.lr, max_lr=opt.lr * 10) # training print("start training....") min_loss = 1e+20 best_res = 1e+10 mse_func = nn.MSELoss() mae_func = nn.L1Loss() smooth_mae_func = nn.SmoothL1Loss() for epoch in range(opt.num_epochs): total_loss = 0.0 total_maeloss = 0.0 model.train() print("{} Epoch {}: start".format(now(), epoch)) for idx, (train_datas, scores) in enumerate(train_data_loader): if opt.use_gpu: scores = torch.FloatTensor(scores).cuda() else: scores = torch.FloatTensor(scores) train_datas = unpack_input(opt, train_datas) optimizer.zero_grad() output = model(train_datas) mse_loss = mse_func(output, scores) total_loss += mse_loss.item() * len(scores) mae_loss = mae_func(output, scores) total_maeloss += mae_loss.item() smooth_mae_loss = smooth_mae_func(output, scores) if opt.update_method == 'mse': loss = mse_loss if opt.update_method == 'rmse': loss = torch.sqrt(mse_loss) / 2.0 if opt.update_method == 'mae': loss = mae_loss if opt.update_method == 'smooth_mae': loss = smooth_mae_loss loss.backward() optimizer.step() if opt.fine_step: if idx % opt.print_step == 0 and idx > 0: print("\t{}, {} step finised;".format(now(), idx)) predict_loss, test_mse = predict(model, test_data_loader, opt, use_gpu=opt.use_gpu) if predict_loss < min_loss: model.save(name=opt.dataset, opt=opt.print_opt) min_loss = predict_loss print("\tmodel save") if predict_loss > min_loss: best_res = min_loss scheduler.step(epoch) print("{};epoch:{};total_loss:{}".format(now(), epoch, total_loss)) mse = total_loss * 1.0 / len(train_data) mae = total_maeloss * 1.0 / len(train_data) print("{};train reslut: mse: {}; rmse: {}; mae: {}".format(now(), mse, math.sqrt(mse), mae)) predict_loss, test_mse = predict(model, test_data_loader, opt, use_gpu=opt.use_gpu) if predict_loss < min_loss: # model.save(name=opt.dataset, opt=opt.print_opt) min_loss = predict_loss print("model save") if test_mse < best_res: best_res = test_mse print("----"*20) print(f"{now()} {opt.dataset} {opt.print_opt} best_res: {best_res}") print("----"*20)
def train_model(args): model = Model(node_embeddings, args.node_out_dim) if CUDA: model.cuda() if args.is_test: model.load_state_dict( torch.load('./checkpoints/{0}/trained_{1}.pth'.format( args.data, args.test_check))) get_test_score(model) return # NN = getL() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.5, last_epoch=-1) # gat_loss_func = torch.nn.BCEWithLogitsLoss() gat_loss_func = torch.nn.MSELoss() epoch_losses = [] # losses of all epochs print("Number of epochs {}".format(args.epochs)) model.train() for epoch in range(args.epochs + 1): # print("\nepoch-> ", epoch) # print("Training set shuffled, length is ", Corpus_.train_indices.shape) random.shuffle(Corpus_.train_edge_data) random.shuffle(Corpus_.train_neg_data) Corpus_.train_indices = np.array(list(Corpus_.train_edge_data)).astype( np.int32) Corpus_.train_neg_indices = np.array(list( Corpus_.train_neg_data)).astype(np.int32) start_time = time.time() epoch_loss = [] if Corpus_.num_nodes % 500 == 0: num_iters_per_epoch = Corpus_.num_nodes // 500 else: num_iters_per_epoch = (Corpus_.num_nodes // 500) + 1 for iters in range(num_iters_per_epoch): start_time_iter = time.time() train_indices, train_indices_neg = Corpus_.get_iteration_batch(0) if CUDA: train_indices = Variable( torch.LongTensor(train_indices)).cuda() train_indices_neg = Variable( torch.LongTensor(train_indices_neg)).cuda() else: train_indices = Variable(torch.LongTensor(train_indices)) optimizer.zero_grad() node_embeds = model() loss = batch_gat_loss(gat_loss_func, train_indices, train_indices_neg, node_embeds) if SP_LOSS == True: neighbor_spectrum_loss = get_neighbor_spectrum_loss(iters, Corpus_.neighbors, \ Corpus_.neighbors_count, node_embeds, num_iters_per_epoch) (loss + float(args.regterm) * neighbor_spectrum_loss).backward() else: loss.backward() optimizer.step() epoch_loss.append(loss.data.item()) end_time_iter = time.time() # print("Iteration-> {0} , Iteration_time-> {1:.4f} , Iteration_loss {2:.4f}".format( # iters, end_time_iter - start_time_iter, loss.data.item())) scheduler.step() # if epoch % 100 == 0: print("Epoch {} , average loss {} , epoch_time {}\n".format( epoch, sum(epoch_loss) / len(epoch_loss), time.time() - start_time)) epoch_losses.append(sum(epoch_loss) / len(epoch_loss)) if epoch > 0 and epoch % 100 == 0: save_model(model, epoch, args.data) model.load_state_dict( torch.load('./checkpoints/{0}/trained_{1}.pth'.format( args.data, args.epochs))) get_test_score(model)
def train_eval(args): logging_config(folder=args.save_dir, name='log{:d}'.format(args.save_id), no_console=False) logging.info(args) ### check context use_cuda = args.gpu >= 0 and th.cuda.is_available() if use_cuda: th.cuda.set_device(args.gpu) ### load data dataset = DataLoader(data_name=args.data_name, seed=args.seed) print(dataset) model = Model(use_KG=True, input_node_dim=args.entity_embed_dim, gnn_model=args.gnn_model, num_gnn_layers=args.gnn_num_layer, n_hidden=args.gnn_hidden_size, dropout=args.dropout_rate, n_entities=dataset.n_KG_entity, n_relations=dataset.n_KG_relation, relation_dim=args.relation_embed_dim, reg_lambda_kg=args.regs, reg_lambda_gnn=args.regs) if use_cuda: model.cuda() logging.info(model) ### optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr) valid_metric_logger = MetricLogger( ['epoch', 'recall', 'ndcg', 'is_best'], ['%d', '%.5f', '%.5f', '%d'], os.path.join(args.save_dir, 'valid{:d}.csv'.format(args.save_id))) test_metric_logger = MetricLogger( ['epoch', 'recall', 'ndcg'], ['%d', '%.5f', '%.5f'], os.path.join(args.save_dir, 'test{:d}.csv'.format(args.save_id))) best_epoch = -1 best_recall = 0.0 train_g = dataset.train_g nid_th = th.LongTensor(train_g.ndata["id"]) etype_th = th.LongTensor(train_g.edata["type"]) if use_cuda: nid_th, etype_th = nid_th.cuda(), etype_th.cuda() train_g.ndata['id'] = nid_th train_g.edata['type'] = etype_th test_g = dataset.test_g nid_th = th.LongTensor(test_g.ndata["id"]) etype_th = th.LongTensor(test_g.edata["type"]) if use_cuda: nid_th, etype_th = nid_th.cuda(), etype_th.cuda() test_g.ndata['id'] = nid_th test_g.edata['type'] = etype_th item_id_range = th.LongTensor(dataset.item_id_range).cuda() if use_cuda \ else th.LongTensor(dataset.item_id_range) for epoch in range(1, args.max_epoch + 1): ### train kg time1 = time() kg_sampler = dataset.KG_sampler(batch_size=args.batch_size_kg) iter = 0 total_loss = 0.0 for h, r, pos_t, neg_t, _ in kg_sampler: iter += 1 model.train() h_th = th.LongTensor(h) r_th = th.LongTensor(r) pos_t_th = th.LongTensor(pos_t) neg_t_th = th.LongTensor(neg_t) if use_cuda: h_th, r_th, pos_t_th, neg_t_th = h_th.cuda(), r_th.cuda( ), pos_t_th.cuda(), neg_t_th.cuda() loss = model.transR(h_th, r_th, pos_t_th, neg_t_th) loss.backward() optimizer.step() optimizer.zero_grad() total_loss += loss.item() if (iter % args.print_every) == 0 or iter == 1: logging.info("Epoch {:04d} Iter {:04d} | Loss {:.4f} ".format( epoch, iter, total_loss / iter)) logging.info('Time for KGE: {:.1f}s, loss {:.4f}'.format( time() - time1, total_loss / iter)) ### train GNN if args.use_attention: time1 = time() print("Compute attention weight in train ...") with th.no_grad(): A_w = model.compute_attention(train_g) train_g.edata['w'] = A_w print("Time: {:.2f}s".format(time() - time1)) time1 = time() cf_sampler = dataset.CF_pair_sampler(batch_size=args.batch_size) iter = 0 total_loss = 0.0 for user_ids, item_pos_ids, item_neg_ids, _ in cf_sampler: iter += 1 model.train() user_ids_th = th.LongTensor(user_ids) item_pos_ids_th = th.LongTensor(item_pos_ids) item_neg_ids_th = th.LongTensor(item_neg_ids) if use_cuda: user_ids_th, item_pos_ids_th, item_neg_ids_th = \ user_ids_th.cuda(), item_pos_ids_th.cuda(), item_neg_ids_th.cuda() embedding = model.gnn(train_g, train_g.ndata['id']) loss = model.get_loss(embedding, user_ids_th, item_pos_ids_th, item_neg_ids_th) loss.backward() # th.nn.utils.clip_grad_norm_(model.parameters(), args.grad_norm) # clip gradients optimizer.step() optimizer.zero_grad() total_loss += loss.item() if (iter % args.print_every) == 0 or iter == 1: logging.info("Epoch {:04d} Iter {:04d} | Loss {:.4f} ".format( epoch, iter, total_loss / iter)) logging.info('Time for GNN: {:.1f}s, loss {:.4f}'.format( time() - time1, total_loss / iter)) if epoch % args.evaluate_every == 0: time1 = time() val_recall, val_ndcg = eval(model, train_g, dataset.train_user_dict, dataset.valid_user_dict, item_id_range, use_cuda, args.use_attention) info = "Epoch{}, [{:.1f}s] val recall:{:.5f}, val ndcg:{:.5f}".format( epoch, time() - time1, val_recall, val_ndcg) # save best model if val_recall > best_recall: valid_metric_logger.log(epoch=epoch, recall=val_recall, ndcg=val_ndcg, is_best=1) best_recall = val_recall #best_ndcg = val_ndcg best_epoch = epoch time1 = time() test_recall, test_ndcg = eval(model, test_g, dataset.train_valid_user_dict, dataset.test_user_dict, item_id_range, use_cuda, args.use_attention) test_metric_logger.log(epoch=epoch, recall=test_recall, ndcg=test_ndcg) info += "\t[{:.1f}s] test recall:{:.5f}, test ndcg:{:.5f}".format( time() - time1, test_recall, test_ndcg) #th.save({'state_dict': model.state_dict(), 'epoch': epoch}, model_state_file) else: valid_metric_logger.log(epoch=epoch, recall=val_recall, ndcg=val_ndcg, is_best=0) recall, ndcg = eval(model, test_g, dataset.train_valid_user_dict, dataset.test_user_dict, item_id_range, use_cuda, args.use_attention) print("test recall:{}, test_ndcg: {}".format(recall, ndcg)) logging.info(info) logging.info( "Final test recall:{:.5f}, test ndcg:{:.5f}, best epoch:{}".format( test_recall, test_ndcg, best_epoch))
def main(args): # get dataframe df = get_df(args.groups) # get adaptive margin tmp = np.sqrt( 1 / np.sqrt(df['label_group'].value_counts().sort_index().values)) margins = (tmp - tmp.min()) / (tmp.max() - tmp.min()) * 0.45 + 0.05 # get augmentations transforms_train, transforms_val = get_transforms(args.image_size, args.stage) # get train and valid dataset df_train = df[df['fold'] != args.fold] if not args.full else df df_train['label_group'] = LabelEncoder().fit_transform( df_train.label_group) df_valid = df[df['fold'] == args.fold] out_dim = df_train.label_group.nunique() print(f"out_dim = {out_dim}") dataset_train = ShoppeDataset(df_train, 'train', transform=transforms_train) dataset_valid = ShoppeDataset(df_valid, 'val', transform=transforms_val) print( f'Train on {len(df_train)} images, validate on {len(df_valid)} images') train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=args.batch_size, num_workers=args.num_workers, pin_memory=True, shuffle=True, drop_last=True) valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=args.batch_size, num_workers=args.num_workers) loss_config = decode_config(args.loss_config) # model if args.enet_type == 'resnest50': model = Resnest50(out_dim=out_dim, loss_config=loss_config, args=args) else: model = Model(args.enet_type, out_dim=out_dim, loss_config=loss_config, args=args) model = model.cuda() # loss func criterion = get_criterion(args, out_dim, margins) # optimizer optimizer = optim.AdamW(model.parameters(), lr=args.init_lr) # load pretrained if args.load_from and args.load_from != 'none': checkpoint = torch.load(args.load_from, map_location='cuda:0') state_dict = checkpoint['model_state_dict'] state_dict = { k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys() } model.load_state_dict(state_dict, strict=True) del checkpoint, state_dict torch.cuda.empty_cache() gc.collect() print(f"Loaded weight from {args.load_from}") # lr scheduler scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( optimizer, args.n_epochs - 1) warmup_epochs = args.warmup_epochs if args.stage == 1 else 1 print(warmup_epochs) scheduler_warmup = GradualWarmupSchedulerV2( optimizer, multiplier=10, total_epoch=warmup_epochs, after_scheduler=scheduler_cosine) # train & valid loop best_score = -1 model_file = os.path.join( args.model_dir, weight_file(args.kernel_type, args.fold, args.stage, loss_config.loss_type, out_dim)) for epoch in range(args.start_from_epoch, args.n_epochs + 1): print(time.ctime(), f'Epoch: {epoch}/{args.n_epochs}') scheduler_warmup.step(epoch - 1) train_loss, acc_list = train_epoch(model, train_loader, optimizer, criterion) f1score = val_epoch(model, valid_loader, criterion, df_valid, args) content = time.ctime() + ' ' + \ ( f'Fold {args.fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {np.mean(train_loss):.5f},' f' train acc {np.mean(acc_list):.5f}, f1score: {(f1score):.6f}.') print(content) with open(os.path.join(args.log_dir, f'{args.kernel_type}.txt'), 'a') as appender: appender.write(content + '\n') if f1score > best_score: print('best f1 score ({:.6f} --> {:.6f}). Saving model ...'.format( best_score, f1score)) torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, model_file) best_score = f1score if epoch == args.stop_at_epoch: print(time.ctime(), 'Training Finished!') break torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, model_file)
pretrained_dict = torch.load(model_path) model_dict = model.state_dict() # only load parameters in dynamics_predictor pretrained_dict = { k: v for k, v in pretrained_dict.items() \ if 'dynamics_predictor' in k and k in model_dict} model.load_state_dict(pretrained_dict, strict=False) else: AssertionError("Unsupported stage %s, using other evaluation scripts" % args.stage) model.eval() if use_gpu: model = model.cuda() infos = np.arange(10) for idx_episode in range(len(infos)): print("Rollout %d / %d" % (idx_episode, len(infos))) B = 1 n_particle, n_shape = 0, 0 # ground truth datas = [] p_gt = [] s_gt = [] for step in range(args.time_step):
def predict(args): testset = IC15TestDataset() testloader = torch.utils.data.DataLoader(dataset=testset, batch_size=1, shuffle=False, num_workers=1, drop_last=True) if args.backbone == 'res50': model = resnet50(pretrained=True, num_classes=6) elif args.backbone == 'res18': model = Model() else: raise NotImplementedError for param in model.parameters(): param.requires_grad = False model = model.cuda() if args.resume is not None: if os.path.exists(args.resume): print('Load from', args.resume) checkpoint = torch.load(args.resume) # 这里为什么不直接用model.load_state_dict(checkpoint['state_dict']) # 是因为训练时使用多卡训练,模型中各个参数的名字前面有个前缀,需要去除该前缀 d = collections.OrderedDict() for key, value in checkpoint['state_dict'].items(): tmp = key[7:] d[tmp] = value model.load_state_dict(d) else: print('No such checkpoint file at', args.resume) model.eval() for idx, (img, original_img) in tqdm(enumerate(testloader)): img = Variable(img.cuda()) original_img = original_img.numpy().astype('uint8')[0] original_img = original_img.copy() outputs = model(img) bboxes = generate_result_PAN(outputs, original_img, threshold=0.7) for i in range(len(bboxes)): bboxes[i] = bboxes[i].reshape(4, 2)[:, [1, 0]].reshape(-1) for bbox in bboxes: cv2.drawContours(original_img, [bbox.reshape(4, 2)], -1, (0, 255, 0), 1) image_name = testset.img_paths[idx].split('/')[-1].split('.')[0] generate_txt_result_PAN( bboxes, image_name, 'outputs/result_ic15_txt_PAN_baseline600_v4_4_90_7') generate_img_result( original_img, image_name, 'outputs/result_ic15_img_PAN_baseline600_v4_4_90_7') cmd = 'cd %s;zip -j %s %s/*' % ('./outputs/', 'submit_ic15.zip', 'result_txt_ic15_PAN_baseline') print(cmd)
def train(**kwargs): if 'dataset' not in kwargs: opt = getattr(config, 'Toys_and_Games_data_Config')() else: opt = getattr(config, kwargs['dataset'] + '_Config')() opt.parse(kwargs) random.seed(opt.seed) np.random.seed(opt.seed) torch.manual_seed(opt.seed) if opt.use_gpu: torch.cuda.manual_seed_all(opt.seed) if len(opt.gpu_ids) == 0 and opt.use_gpu: torch.cuda.set_device(opt.gpu_id) # 2 model model = Model(opt, getattr(methods, opt.model)) if opt.use_gpu: model.cuda() if len(opt.gpu_ids) > 0: print("use multi gpu") model = nn.DataParallel(model, device_ids=opt.gpu_ids) if opt.load_ckp: assert len(opt.ckp_path) > 0 model.load(opt.ckp_path) # 3 data train_data = AmazonData(opt.data_root, train=True) train_data_loader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers, collate_fn=collate_fn) test_data = AmazonData(opt.data_root, train=False) test_data_loader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, collate_fn=collate_fn) print('{}: train data: {}; test data: {}'.format(now(), len(train_data), len(test_data))) # 4 optimiezer # optimizer = optim.Adadelta(model.parameters(), rho=0.95, eps=1e-6, weight_decay=opt.weight_decay) # optimizer = optim.SGD(model.parameters(), lr=opt.lr, momentum=0.9, weight_decay=opt.weight_decay) if opt.fine_tune: word_char = get_word_para() all_weights = dict(model.named_parameters()) word_paras = [] others = [] for name in all_weights: if name in word_char: word_paras.append(all_weights[name]) else: others.append(all_weights[name]) optimizer = optim.Adam([{'params': others}, {'params': word_paras, 'lr': opt.lr * 0.3}], lr=opt.lr, weight_decay=opt.weight_decay) else: optimizer = optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.999), weight_decay=opt.weight_decay) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.8) # training print("start training....") min_loss = 1e+20 best_res = 1e+10 mse_func = nn.MSELoss() for epoch in range(opt.num_epochs): total_loss = 0.0 model.train() scheduler.step(epoch) # train_data_loader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers, collate_fn=collate_fn) print("{} Epoch {}: start".format(now(), epoch)) for idx, (train_datas, scores) in enumerate(train_data_loader): if opt.use_gpu: scores = torch.FloatTensor(scores).cuda() else: scores = torch.FloatTensor(scores) train_datas = unpack_input(opt, train_datas) optimizer.zero_grad() output = model(train_datas) loss = mse_func(output, scores) total_loss += loss.item() * len(scores) # loss = loss / 2.0 # tf.nn.l2loss loss.backward() optimizer.step() if idx % opt.print_step == 0 and idx > 0: print("\t{}, {} step finised;".format(now(), idx)) predict_loss, test_mse = predict(model, test_data_loader, opt, use_gpu=opt.use_gpu) if predict_loss < min_loss: # model.save(name=opt.dataset, opt=opt.print_opt) min_loss = predict_loss print("\tmodel save") if test_mse < best_res: best_res = test_mse print("{};epoch:{};total_loss:{}".format(now(), epoch, total_loss)) mse = total_loss * 1.0 / len(train_data) print("{};train reslut: mse: {}; rmse: {}".format(now(), mse, math.sqrt(mse))) predict_loss, test_mse = predict(model, test_data_loader, opt, use_gpu=opt.use_gpu) if predict_loss < min_loss: # model.save(name=opt.dataset, opt=opt.print_opt) min_loss = predict_loss print("model save") if test_mse < best_res: best_res = test_mse print("----"*20) print(f"{now()} {opt.dataset} {opt.print_opt} best_res: {best_res}") print("----"*20)
def crossover(fir_edge, sec_edge, adj, changes): co_list = [] fitness_list = [] co_list.append(fir_edge) co_list.append(sec_edge) fir_x, fir_y, fir_signal = fir_edge sec_x, sec_y, sec_signal = sec_edge signal = adj[fir_x, sec_y] if signal > 0: third_signal = 0 else: third_signal = 1 third_edge = (fir_x, sec_y, third_signal) signal = adj[sec_x, fir_y] if signal > 0: four_signal = 0 else: four_signal = 1 four_edge = (sec_x, fir_y, four_signal) co_list.append(third_edge) co_list.append(four_edge) for i in range(len(co_list)): x, y, signal = co_list[i] new_adj = adj.clone() if (x, y) in changes or (y, x) in changes: fitness_list.append(sys.maxsize) continue else: if signal == 1: new_adj[x, y] = 1.0 new_adj[y, x] = 1.0 if signal == 0: new_adj[x, y] = 0.0 new_adj[x, y] = 0.0 adj_selfloops = torch.add(new_adj, torch.eye(_N).cuda()) inv_degrees = torch.pow( torch.sum(adj_selfloops, dim=0, keepdim=True), -0.5) adj_norm_tensor_cuda = adj_selfloops * inv_degrees * inv_degrees.transpose( 0, 1) new_model = Model(_F, args.tar_hidden, _K) if args.cuda: new_model.cuda() new_optimizer = optim.Adam(new_model.parameters(), lr=args.tar_lr, weight_decay=args.tar_weight_decay) new_model.model_train(new_optimizer, args.tar_epochs, _X_cuda, adj_norm_tensor_cuda, _z_cuda, idx_train_cuda, idx_val_cuda, use_relu=False, drop_rate=args.drop_rate) new_model.model_test(_X_cuda, adj_norm_tensor_cuda, pre_all_labels_cuda, extra_idx_cuda, use_relu=False) loss_test = -new_model.loss_test fitness_list.append(loss_test) fitness_idx = sorted(range(len(fitness_list)), key=lambda k: fitness_list[k]) index = fitness_idx[0] return co_list[index]
def get_greedy_list(ori_adj_cuda, Greedy_edges, change_edges): new_adj_tensor_cuda = ori_adj_cuda.clone() adj_selfloops = torch.add(new_adj_tensor_cuda, torch.eye(_N).cuda()) inv_degrees = torch.pow(torch.sum(adj_selfloops, dim=0, keepdim=True), -0.5) new_adj_norm_tensor_cuda = adj_selfloops * inv_degrees * inv_degrees.transpose( 0, 1) #new_adj_norm_tensor_cuda.requires_grad = True new_surrogate_model = Model(_F, args.tar_hidden, _K) if args.cuda: new_surrogate_model.cuda() new_surrogate_optimizer = optim.Adam(new_surrogate_model.parameters(), lr=args.tar_lr, weight_decay=args.tar_weight_decay) new_surrogate_model.model_train(new_surrogate_optimizer, args.tar_epochs, _X_cuda, new_adj_norm_tensor_cuda, _z_cuda, idx_train_cuda, idx_val_cuda, use_relu=False, drop_rate=args.drop_rate) new_surrogate_model.zero_grad() new_adj_norm_tensor_cuda.requires_grad = True outputs = new_surrogate_model(_X_cuda, new_adj_norm_tensor_cuda, False, drop_rate=args.drop_rate) loss = F.nll_loss(outputs[idx_train_cuda], _z_cuda[idx_train_cuda]) loss = -loss loss.backward() grad = -(new_adj_norm_tensor_cuda.grad.data.cpu().numpy().flatten()) grad_abs = -(np.abs(grad)) idxes = np.argsort(grad_abs) find = 0 acc = None for p in idxes: if (len(Greedy_edges) < args.greedy_edges): x = p // _N y = p % _N if (x, y) in change_edges or (y, x) in change_edges: continue # add edge if grad[p] > 0: signal = 1 if x == y or x in onehops_dict[y] or y in onehops_dict[x]: continue else: find, acc = method_add(x, y, new_adj_tensor_cuda, new_surrogate_model) # ori_adj_cuda = new_adj_tensor_cuda.clone() # delete edge else: signal = 0 if x == y or not x in onehops_dict[y] or not y in onehops_dict[ x]: continue else: find, acc = method_del(x, y, new_adj_tensor_cuda, new_surrogate_model) if find == 1: edge_oper = (x, y, signal) acc = acc.item() Greedy_edges[edge_oper] = acc print('Greedy edge number', len(Greedy_edges)) else: break Greedy_list = sorted(Greedy_edges.items(), key=lambda x: x[1]) return Greedy_list
adj_selfloops = torch.add(ori_adj_tensor_cuda, torch.eye(_N).cuda()) target_inv_degrees = torch.pow( torch.sum(adj_selfloops, dim=0, keepdim=True), -0.5) target_adj_norm_tensor_cuda = adj_selfloops * target_inv_degrees * target_inv_degrees.transpose( 0, 1) _X_cuda, _z_cuda, idx_train_cuda, idx_val_cuda, idx_test_cuda = convert_to_Tensor( [_X_obs, _Z_obs, split_train, split_val, split_test]) all_idx_cuda = torch.cat((idx_train_cuda, idx_val_cuda, idx_test_cuda)) extra_idx_cuda = torch.cat((idx_val_cuda, idx_test_cuda)) surrogate_model = Model(_F, args.tar_hidden, _K) if args.cuda: surrogate_model.cuda() surrogate_optimizer = optim.Adam(surrogate_model.parameters(), lr=args.tar_lr, weight_decay=args.tar_weight_decay) surrogate_model.model_train(surrogate_optimizer, args.tar_epochs, _X_cuda, target_adj_norm_tensor_cuda, _z_cuda, idx_train_cuda, idx_val_cuda, use_relu=False, drop_rate=args.drop_rate) target_model = Model(_F, args.tar_hidden, _K)
def main(): model_name = '3dgnn_enet' current_path = os.getcwd() logger = logging.getLogger(model_name) log_path = current_path + '/artifacts/'+ str(datetime.datetime.now().strftime('%Y-%m-%d-%H')).replace(' ', '/') + '/' print('log path is:',log_path) if not os.path.exists(log_path): os.makedirs(log_path) os.makedirs(log_path + 'save/') hdlr = logging.FileHandler(log_path + model_name + '.log') formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') hdlr.setFormatter(formatter) logger.addHandler(hdlr) logger.setLevel(logging.INFO) logger.info("Loading data...") print("Loading data...") label_to_idx = {'<UNK>': 0, 'beam': 1, 'board': 2, 'bookcase': 3, 'ceiling': 4, 'chair': 5, 'clutter': 6, 'column': 7, 'door': 8, 'floor': 9, 'sofa': 10, 'table': 11, 'wall': 12, 'window': 13} idx_to_label = {0: '<UNK>', 1: 'beam', 2: 'board', 3: 'bookcase', 4: 'ceiling', 5: 'chair', 6: 'clutter', 7: 'column', 8: 'door', 9: 'floor', 10: 'sofa', 11: 'table', 12: 'wall', 13: 'window'} '''Data Loader parameter''' # Batch size batch_size_tr = 4 batch_size_va = 4 # Multiple threads loading data workers_tr = 4 workers_va = 4 # Data augmentation flip_prob = 0.5 crop_size = 0 dataset_tr = nyudv2.Dataset(flip_prob=flip_prob,crop_type='Random',crop_size=crop_size) dataloader_tr = DataLoader(dataset_tr, batch_size=batch_size_tr, shuffle=True, num_workers=workers_tr, drop_last=False, pin_memory=True) dataset_va = nyudv2.Dataset(flip_prob=0.0,crop_type='Center',crop_size=crop_size) dataloader_va = DataLoader(dataset_va, batch_size=batch_size_va, shuffle=False, num_workers=workers_va, drop_last=False, pin_memory=True) cv2.setNumThreads(workers_tr) class_weights = [0.0]+[1.0 for i in range(13)] nclasses = len(class_weights) num_epochs = 50 '''GNN parameter''' use_gnn = True gnn_iterations = 3 gnn_k = 64 mlp_num_layers = 1 '''Model parameter''' use_bootstrap_loss = False bootstrap_rate = 0.25 use_gpu = True logger.info("Preparing model...") print("Preparing model...") model = Model(nclasses, mlp_num_layers,use_gpu) loss = nn.NLLLoss(reduce=not use_bootstrap_loss, weight=torch.FloatTensor(class_weights)) softmax = nn.Softmax(dim=1) log_softmax = nn.LogSoftmax(dim=1) if use_gpu: model = model.cuda() loss = loss.cuda() softmax = softmax.cuda() log_softmax = log_softmax.cuda() '''Optimizer parameter''' base_initial_lr = 5e-4 gnn_initial_lr = 1e-3 betas = [0.9, 0.999] eps = 1e-08 weight_decay = 1e-4 lr_schedule_type = 'exp' lr_decay = 0.9 lr_patience = 10 optimizer = torch.optim.Adam([{'params': model.decoder.parameters()}, {'params': model.gnn.parameters(), 'lr': gnn_initial_lr}], lr=base_initial_lr, betas=betas, eps=eps, weight_decay=weight_decay) if lr_schedule_type == 'exp': lambda1 = lambda epoch: pow((1 - ((epoch - 1) / num_epochs)), lr_decay) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1) elif lr_schedule_type == 'plateau': scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=lr_decay, patience=lr_patience) else: print('bad scheduler') exit(1) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) logger.info("Number of trainable parameters: %d", params) def get_current_learning_rates(): learning_rates = [] for param_group in optimizer.param_groups: learning_rates.append(param_group['lr']) return learning_rates def eval_set(dataloader): model.eval() with torch.no_grad(): loss_sum = 0.0 confusion_matrix = torch.cuda.FloatTensor(np.zeros(14 ** 2)) start_time = time.time() for batch_idx, rgbd_label_xy in enumerate(dataloader): sys.stdout.write('\rEvaluating test set... {}/{}'.format(batch_idx + 1, len(dataloader))) x = rgbd_label_xy[0] xy = rgbd_label_xy[2] target = rgbd_label_xy[1].long() x = x.float() xy = xy.float() input = x.permute(0, 3, 1, 2).contiguous() xy = xy.permute(0, 3, 1, 2).contiguous() if use_gpu: input = input.cuda() xy = xy.cuda() target = target.cuda() output = model(input, gnn_iterations=gnn_iterations, k=gnn_k, xy=xy, use_gnn=use_gnn) if use_bootstrap_loss: loss_per_pixel = loss.forward(log_softmax(output.float()), target) topk, indices = torch.topk(loss_per_pixel.view(output.size()[0], -1), int((crop_size ** 2) * bootstrap_rate)) loss_ = torch.mean(topk) else: loss_ = loss.forward(log_softmax(output.float()), target) loss_sum += loss_ pred = output.permute(0, 2, 3, 1).contiguous() pred = pred.view(-1, nclasses) pred = softmax(pred) pred_max_val, pred_arg_max = pred.max(1) pairs = target.view(-1) * 14 + pred_arg_max.view(-1) for i in range(14 ** 2): cumu = pairs.eq(i).float().sum() confusion_matrix[i] += cumu.item() sys.stdout.write(" - Eval time: {:.2f}s \n".format(time.time() - start_time)) loss_sum /= len(dataloader) confusion_matrix = confusion_matrix.cpu().numpy().reshape((14, 14)) class_iou = np.zeros(14) # we ignore void values confusion_matrix[0, :] = np.zeros(14) confusion_matrix[:, 0] = np.zeros(14) for i in range(1, 14): class_iou[i] = confusion_matrix[i, i] / ( np.sum(confusion_matrix[i, :]) + np.sum(confusion_matrix[:, i]) - confusion_matrix[i, i]) return loss_sum.item(), class_iou, confusion_matrix '''Training parameter''' model_to_load = None logger.info("num_epochs: %d", num_epochs) print("Number of epochs: %d"%num_epochs) interval_to_show = 100 train_losses = [] eval_losses = [] if model_to_load: logger.info("Loading old model...") print("Loading old model...") model.load_state_dict(torch.load(model_to_load)) else: logger.info("Starting training from scratch...") print("Starting training from scratch...") '''Training''' for epoch in range(1, num_epochs + 1): batch_loss_avg = 0 if lr_schedule_type == 'exp': scheduler.step(epoch) for batch_idx, rgbd_label_xy in enumerate(dataloader_tr): sys.stdout.write('\rTraining data set... {}/{}'.format(batch_idx + 1, len(dataloader_tr))) x = rgbd_label_xy[0] target = rgbd_label_xy[1].long() xy = rgbd_label_xy[2] x = x.float() xy = xy.float() input = x.permute(0, 3, 1, 2).contiguous() input = input.type(torch.FloatTensor) if use_gpu: input = input.cuda() xy = xy.cuda() target = target.cuda() xy = xy.permute(0, 3, 1, 2).contiguous() optimizer.zero_grad() model.train() output = model(input, gnn_iterations=gnn_iterations, k=gnn_k, xy=xy, use_gnn=use_gnn) if use_bootstrap_loss: loss_per_pixel = loss.forward(log_softmax(output.float()), target) topk, indices = torch.topk(loss_per_pixel.view(output.size()[0], -1), int((crop_size ** 2) * bootstrap_rate)) loss_ = torch.mean(topk) else: loss_ = loss.forward(log_softmax(output.float()), target) loss_.backward() optimizer.step() batch_loss_avg += loss_.item() if batch_idx % interval_to_show == 0 and batch_idx > 0: batch_loss_avg /= interval_to_show train_losses.append(batch_loss_avg) logger.info("E%dB%d Batch loss average: %s", epoch, batch_idx, batch_loss_avg) print('\rEpoch:{}, Batch:{}, loss average:{}'.format(epoch, batch_idx, batch_loss_avg)) batch_loss_avg = 0 batch_idx = len(dataloader_tr) logger.info("E%dB%d Saving model...", epoch, batch_idx) torch.save(model.state_dict(),log_path +'/save/'+'checkpoint_'+str(epoch)+'.pth') '''Evaluation''' eval_loss, class_iou, confusion_matrix = eval_set(dataloader_va) eval_losses.append(eval_loss) if lr_schedule_type == 'plateau': scheduler.step(eval_loss) print('Learning ...') logger.info("E%dB%d Def learning rate: %s", epoch, batch_idx, get_current_learning_rates()[0]) print('Epoch{} Def learning rate: {}'.format(epoch, get_current_learning_rates()[0])) logger.info("E%dB%d GNN learning rate: %s", epoch, batch_idx, get_current_learning_rates()[1]) print('Epoch{} GNN learning rate: {}'.format(epoch, get_current_learning_rates()[1])) logger.info("E%dB%d Eval loss: %s", epoch, batch_idx, eval_loss) print('Epoch{} Eval loss: {}'.format(epoch, eval_loss)) logger.info("E%dB%d Class IoU:", epoch, batch_idx) print('Epoch{} Class IoU:'.format(epoch)) for cl in range(14): logger.info("%+10s: %-10s" % (idx_to_label[cl], class_iou[cl])) print('{}:{}'.format(idx_to_label[cl], class_iou[cl])) logger.info("Mean IoU: %s", np.mean(class_iou[1:])) print("Mean IoU: %.2f"%np.mean(class_iou[1:])) logger.info("E%dB%d Confusion matrix:", epoch, batch_idx) logger.info(confusion_matrix) logger.info("Finished training!") logger.info("Saving model...") print('Saving final model...') torch.save(model.state_dict(), log_path + '/save/3dgnn_enet_finish.pth') eval_loss, class_iou, confusion_matrix = eval_set(dataloader_va) logger.info("Eval loss: %s", eval_loss) logger.info("Class IoU:") for cl in range(14): logger.info("%+10s: %-10s" % (idx_to_label[cl], class_iou[cl])) logger.info("Mean IoU: %s", np.mean(class_iou[1:]))
def train_nn(dataset_path, hha_dir, save_models_dir, num_epochs=50, batch_size=4, from_last_check_point=False, check_point_prefix='checkpoint', start_epoch=0, pre_train_model='', notebook=False): progress = tqdm_notebook if notebook else tqdm logger.info('Loading data...') dataset_tr = nyudv2.Dataset(dataset_path, hha_dir, flip_prob=config.flip_prob, crop_type='Random', crop_size=config.crop_size) dataloader_tr = DataLoader(dataset_tr, batch_size=batch_size, shuffle=True, num_workers=config.workers_tr, drop_last=False, pin_memory=True) dataset_va = nyudv2.Dataset(dataset_path, hha_dir, flip_prob=0.0, crop_type='Center', crop_size=config.crop_size) dataloader_va = DataLoader(dataset_va, batch_size=batch_size, shuffle=False, num_workers=config.workers_va, drop_last=False, pin_memory=True) if from_last_check_point: start_epoch, pre_train_model = find_last_check_point( save_models_dir, check_point_prefix) cv2.setNumThreads(config.workers_tr) logger.info('Preparing model...') model = Model(config.nclasses, config.mlp_num_layers, config.use_gpu) loss = nn.NLLLoss(reduce=not config.use_bootstrap_loss, weight=torch.FloatTensor(config.class_weights)) softmax = nn.Softmax(dim=1) log_softmax = nn.LogSoftmax(dim=1) if config.use_gpu: model = model.cuda() loss = loss.cuda() softmax = softmax.cuda() log_softmax = log_softmax.cuda() optimizer = torch.optim.Adam([{ 'params': model.decoder.parameters() }, { 'params': model.gnn.parameters(), 'lr': config.gnn_initial_lr }], lr=config.base_initial_lr, betas=config.betas, eps=config.eps, weight_decay=config.weight_decay) if config.lr_schedule_type == 'exp': def lambda_1(lambda_epoch): return pow((1 - ((lambda_epoch - 1) / num_epochs)), config.lr_decay) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_1) elif config.lr_schedule_type == 'plateau': scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=config.lr_decay, patience=config.lr_patience) else: logger.error('Bad scheduler') exit(1) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) logger.info("Number of trainable parameters: %d", params) def get_current_learning_rates(): learning_rates = [] for param_group in optimizer.param_groups: learning_rates.append(param_group['lr']) return learning_rates def eval_set(dataloader): model.eval() with torch.no_grad(): loss_sum = 0.0 init_tensor_value = np.zeros(14**2) if config.use_gpu: confusion_matrix = torch.cuda.FloatTensor(init_tensor_value) else: confusion_matrix = torch.FloatTensor(init_tensor_value) start_time = time.time() for batch_idx, rgbd_label_xy in progress(enumerate(dataloader), total=len(dataloader), desc=f'Eval set'): x = rgbd_label_xy[0] xy = rgbd_label_xy[2] target = rgbd_label_xy[1].long() x = x.float() xy = xy.float() input = x.permute(0, 3, 1, 2).contiguous() xy = xy.permute(0, 3, 1, 2).contiguous() if config.use_gpu: input = input.cuda() xy = xy.cuda() target = target.cuda() output = model(input, gnn_iterations=config.gnn_iterations, k=config.gnn_k, xy=xy, use_gnn=config.use_gnn) if config.use_bootstrap_loss: loss_per_pixel = loss.forward(log_softmax(output.float()), target) topk, indices = torch.topk( loss_per_pixel.view(output.size()[0], -1), int((config.crop_size**2) * config.bootstrap_rate)) loss_ = torch.mean(topk) else: loss_ = loss.forward(log_softmax(output.float()), target) loss_sum += loss_ pred = output.permute(0, 2, 3, 1).contiguous() pred = pred.view(-1, config.nclasses) pred = softmax(pred) pred_max_val, pred_arg_max = pred.max(1) pairs = target.view(-1) * 14 + pred_arg_max.view(-1) for i in range(14**2): cumu = pairs.eq(i).float().sum() confusion_matrix[i] += cumu.item() sys.stdout.write(" - Eval time: {:.2f}s \n".format(time.time() - start_time)) loss_sum /= len(dataloader) confusion_matrix = confusion_matrix.cpu().numpy().reshape((14, 14)) class_iou = np.zeros(14) confusion_matrix[0, :] = np.zeros(14) confusion_matrix[:, 0] = np.zeros(14) for i in range(1, 14): class_iou[i] = confusion_matrix[i, i] / ( np.sum(confusion_matrix[i, :]) + np.sum(confusion_matrix[:, i]) - confusion_matrix[i, i]) return loss_sum.item(), class_iou, confusion_matrix # Training parameter logger.info(f'Num_epochs: {num_epochs}') interval_to_show = 100 train_losses = [] eval_losses = [] if pre_train_model: logger.info(f'Loading pre-train model {pre_train_model}... ') model.load_state_dict(torch.load(pre_train_model)) else: logger.info('Starting training from scratch...') # Training for epoch in progress(range(start_epoch, num_epochs + 1), desc='Training'): batch_loss_avg = 0 if config.lr_schedule_type == 'exp': scheduler.step(epoch) for batch_idx, rgbd_label_xy in progress(enumerate(dataloader_tr), total=len(dataloader_tr), desc=f'Epoch {epoch}'): x = rgbd_label_xy[0] target = rgbd_label_xy[1].long() xy = rgbd_label_xy[2] x = x.float() xy = xy.float() input = x.permute(0, 3, 1, 2).contiguous() input = input.type(torch.FloatTensor) if config.use_gpu: input = input.cuda() xy = xy.cuda() target = target.cuda() xy = xy.permute(0, 3, 1, 2).contiguous() optimizer.zero_grad() model.train() output = model(input, gnn_iterations=config.gnn_iterations, k=config.gnn_k, xy=xy, use_gnn=config.use_gnn) if config.use_bootstrap_loss: loss_per_pixel = loss.forward(log_softmax(output.float()), target) topk, indices = torch.topk( loss_per_pixel.view(output.size()[0], -1), int((config.crop_size**2) * config.bootstrap_rate)) loss_ = torch.mean(topk) else: loss_ = loss.forward(log_softmax(output.float()), target) loss_.backward() optimizer.step() batch_loss_avg += loss_.item() if batch_idx % interval_to_show == 0 and batch_idx > 0: batch_loss_avg /= interval_to_show train_losses.append(batch_loss_avg) logger.info("E%dB%d Batch loss average: %s", epoch, batch_idx, batch_loss_avg) print('\rEpoch:{}, Batch:{}, loss average:{}'.format( epoch, batch_idx, batch_loss_avg)) batch_loss_avg = 0 batch_idx = len(dataloader_tr) logger.info("E%dB%d Saving model...", epoch, batch_idx) torch.save( model.state_dict(), os.path.join( save_models_dir, f'{check_point_prefix}{CHECK_POINT_SEP}{epoch!s}{MODELS_EXT}')) # Evaluation eval_loss, class_iou, confusion_matrix = eval_set(dataloader_va) eval_losses.append(eval_loss) if config.lr_schedule_type == 'plateau': scheduler.step(eval_loss) print('Learning ...') logger.info("E%dB%d Def learning rate: %s", epoch, batch_idx, get_current_learning_rates()[0]) print('Epoch{} Def learning rate: {}'.format( epoch, get_current_learning_rates()[0])) logger.info("E%dB%d GNN learning rate: %s", epoch, batch_idx, get_current_learning_rates()[1]) print('Epoch{} GNN learning rate: {}'.format( epoch, get_current_learning_rates()[1])) logger.info("E%dB%d Eval loss: %s", epoch, batch_idx, eval_loss) print('Epoch{} Eval loss: {}'.format(epoch, eval_loss)) logger.info("E%dB%d Class IoU:", epoch, batch_idx) print('Epoch{} Class IoU:'.format(epoch)) for cl in range(14): logger.info("%+10s: %-10s" % (IDX_LABEL[cl], class_iou[cl])) print('{}:{}'.format(IDX_LABEL[cl], class_iou[cl])) logger.info("Mean IoU: %s", np.mean(class_iou[1:])) print("Mean IoU: %.2f" % np.mean(class_iou[1:])) logger.info("E%dB%d Confusion matrix:", epoch, batch_idx) logger.info(confusion_matrix) logger.info('Finished training!') logger.info('Saving trained model...') torch.save(model.state_dict(), os.path.join(save_models_dir, f'finish{MODELS_EXT}')) eval_loss, class_iou, confusion_matrix = eval_set(dataloader_va) logger.info('Eval loss: %s', eval_loss) logger.info('Class IoU:') for cl in range(14): logger.info("%+10s: %-10s" % (IDX_LABEL[cl], class_iou[cl])) logger.info(f'Mean IoU: {np.mean(class_iou[1:])}')
nfeat=args.embed_size, nhid=args.hidden, gat_hidden_dim=args.gat_hidden_dim, joint_dim=args.joint_dim, features_index=features_index, tweet_word_adj=tweet_word_adj, user_tweet_adj=user_tweet_adj, nclass=labels.max().item() + 1, dropout=args.dropout, alpha=args.alpha) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) if args.cuda: model.cuda() # features = features.cuda() # adj = adj.cuda() labels = labels.cuda() idx_train = idx_train.cuda() idx_val = idx_val.cuda() idx_test = idx_test.cuda() train_idx = train_idx.cuda() dev_idx = dev_idx.cuda() test_idx = test_idx.cuda() def train(epoch, best_acc, patience): # t = time.time() model.train() total_iters = len(idx_train) // args.batch_size + 1
def main(args): G = nx.read_edgelist(args.classifydir + '_edgelist.txt', nodetype=int) model = Model(nx.number_of_nodes(G), args.num_parts) adj = Variable(torch.FloatTensor(nx.adjacency_matrix(G).toarray()), requires_grad=False) print("NUMBER OF NODES:", len(G.nodes())) print("NUMBER OF Edges:", len(G.edges())) if torch.cuda.is_available(): model = model.cuda() adj = adj.cuda() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.weight_decay) micros = [] macros = [] accuracies = [] losses = [] for epoch in tqdm(range(args.num_epochs)): model.zero_grad() super_adj = model(adj, temp=args.temp, hard=args.hard, beta=args.beta) loss = model.loss(super_adj, balance_node=args.balance_node, lam=args.lam) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() if epoch % 50 == 0: vectors = embed_arr_2_dict(model.params.cpu().detach().numpy(), G) accs = classify(vectors, args) print("micro:", accs['micro'], "macro:", accs['macro'], "loss", loss.item(), "accuracy", accs["acc"]) micros.append(accs['micro']) macros.append(accs['macro']) accuracies.append(accs['acc']) losses.append(loss.item()) print("Final Micro:", micros[-1], "Final Macro:", macros[-1]) plt.plot(micros, label='micros') plt.plot(macros, label='macros') plt.plot(test_x, label='test scores') plt.legend() plt.title("Dataset: " + str(args.classifydir.split("/")[-1]) + ", training percentage: " + str(args.train_percent) + ", learning rate: " + str(args.lr) + ", epochs: " + str(args.num_epochs)) plt.show() plt.plot(losses, label='loss') plt.title("Dataset: " + str(args.classifydir.split("/")[-1]) + ", training percentage: " + str(args.train_percent) + ", learning rate: " + str(args.lr) + ", epochs: " + str(args.num_epochs)) plt.show()
def main(args): os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu logger = logging.getLogger('3dgnn') log_path = './experiment/' + str( datetime.datetime.now().strftime('%Y-%m-%d-%H')).replace(' ', '/') + '/' print('log path is:', log_path) if not os.path.exists(log_path): os.makedirs(log_path) os.makedirs(log_path + 'save/') hdlr = logging.FileHandler(log_path + 'log.txt') formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') hdlr.setFormatter(formatter) logger.addHandler(hdlr) logger.setLevel(logging.INFO) logger.info("Loading data...") print("Loading data...") '''idx_to_label = {0: '<UNK>', 1: 'beam', 2: 'board', 3: 'bookcase', 4: 'ceiling', 5: 'chair', 6: 'clutter', 7: 'column', 8: 'door', 9: 'floor', 10: 'sofa', 11: 'table', 12: 'wall', 13: 'window'}*''' if args.is_2_headed: dataset_tr = nyud2headed.Dataset(flip_prob=config.flip_prob, crop_type='Random', crop_size=config.crop_size) else: dataset_tr = nyudv2.Dataset(flip_prob=config.flip_prob, crop_type='Random', crop_size=config.crop_size) idx_to_label = dataset_tr.label_names if args.is_2_headed: idx_to_label2 = dataset_tr.label2_names dataloader_tr = DataLoader(dataset_tr, batch_size=args.batchsize, shuffle=True, num_workers=config.workers_tr, drop_last=False, pin_memory=True) if args.is_2_headed: dataset_va = nyud2headed.Dataset(flip_prob=0.0, crop_type='Center', crop_size=config.crop_size) else: dataset_va = nyudv2.Dataset(flip_prob=0.0, crop_type='Center', crop_size=config.crop_size) dataloader_va = DataLoader(dataset_va, batch_size=args.batchsize, shuffle=False, num_workers=config.workers_va, drop_last=False, pin_memory=True) cv2.setNumThreads(config.workers_tr) logger.info("Preparing model...") print("Preparing model...") class_weights = [0.0] + [1.0 for i in range(1, len(idx_to_label))] nclasses = len(class_weights) if args.is_2_headed: nclasses1 = nclasses class2_weights = [0.0] + [1.0 for i in range(1, len(idx_to_label2))] nclasses2 = len(class2_weights) model = Model2Headed(nclasses1, nclasses2, config.mlp_num_layers, config.use_gpu) loss2 = nn.NLLLoss(reduce=not config.use_bootstrap_loss, weight=torch.FloatTensor(class2_weights)) else: model = Model(nclasses, config.mlp_num_layers, config.use_gpu) loss = nn.NLLLoss(reduce=not config.use_bootstrap_loss, weight=torch.FloatTensor(class_weights)) softmax = nn.Softmax(dim=1) log_softmax = nn.LogSoftmax(dim=1) if config.use_gpu: model = model.cuda() loss = loss.cuda() if args.is_2_headed: loss2 = loss2.cuda() softmax = softmax.cuda() log_softmax = log_softmax.cuda() optimizer = torch.optim.Adam([{ 'params': model.decoder.parameters() }, { 'params': model.gnn.parameters(), 'lr': config.gnn_initial_lr }], lr=config.base_initial_lr, betas=config.betas, eps=config.eps, weight_decay=config.weight_decay) if config.lr_schedule_type == 'exp': lambda1 = lambda epoch: pow( (1 - ((epoch - 1) / args.num_epochs)), config.lr_decay) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1) elif config.lr_schedule_type == 'plateau': scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=config.lr_decay, patience=config.lr_patience) else: print('bad scheduler') exit(1) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) logger.info("Number of trainable parameters: %d", params) def get_current_learning_rates(): learning_rates = [] for param_group in optimizer.param_groups: learning_rates.append(param_group['lr']) return learning_rates def eval_set(dataloader): model.eval() with torch.no_grad(): loss_sum = 0.0 if config.use_gpu: confusion_matrix = torch.cuda.FloatTensor( np.zeros(len(idx_to_label)**2)) else: confusion_matrix = torch.FloatTensor( np.zeros(len(idx_to_label)**2)) start_time = time.time() for batch_idx, rgbd_label_xy in tqdm(enumerate(dataloader), total=len(dataloader), smoothing=0.9): x = rgbd_label_xy[0] xy = rgbd_label_xy[2] target = rgbd_label_xy[1].long() x = x.float() xy = xy.float() input = x.permute(0, 3, 1, 2).contiguous() xy = xy.permute(0, 3, 1, 2).contiguous() if config.use_gpu: input = input.cuda() xy = xy.cuda() target = target.cuda() output = model(input, gnn_iterations=config.gnn_iterations, k=config.gnn_k, xy=xy, use_gnn=config.use_gnn) # if args.is_2_headed: # output1, output2 = model(input, gnn_iterations=config.gnn_iterations, k=config.gnn_k, xy=xy, # use_gnn=config.use_gnn) if config.use_bootstrap_loss: loss_per_pixel = loss.forward(log_softmax(output.float()), target) topk, indices = torch.topk( loss_per_pixel.view(output.size()[0], -1), int((config.crop_size**2) * config.bootstrap_rate)) loss_ = torch.mean(topk) else: loss_ = loss.forward(log_softmax(output.float()), target) loss_sum += loss_ pred = output.permute(0, 2, 3, 1).contiguous() pred = pred.view(-1, nclasses) pred = softmax(pred) pred_max_val, pred_arg_max = pred.max(1) pairs = target.view(-1) * len( idx_to_label) + pred_arg_max.view(-1) for i in range(len(idx_to_label)**2): cumu = pairs.eq(i).float().sum() confusion_matrix[i] += cumu.item() sys.stdout.write(" - Eval time: {:.2f}s \n".format(time.time() - start_time)) loss_sum /= len(dataloader) confusion_matrix = confusion_matrix.cpu().numpy().reshape( (len(idx_to_label), len(idx_to_label))) class_iou = np.zeros(len(idx_to_label)) confusion_matrix[0, :] = np.zeros(len(idx_to_label)) confusion_matrix[:, 0] = np.zeros(len(idx_to_label)) for i in range(1, len(idx_to_label)): tot = np.sum(confusion_matrix[i, :]) + np.sum( confusion_matrix[:, i]) - confusion_matrix[i, i] if tot == 0: class_iou[i] = 0 else: class_iou[i] = confusion_matrix[i, i] / tot return loss_sum.item(), class_iou, confusion_matrix '''Training parameter''' model_to_load = args.pretrain logger.info("num_epochs: %d", args.num_epochs) print("Number of epochs: %d" % args.num_epochs) interval_to_show = 100 train_losses = [] eval_losses = [] if model_to_load: logger.info("Loading old model...") print("Loading old model...") model.load_state_dict(torch.load(model_to_load)) else: # print("here") # exit(0) logger.info("Starting training from scratch...") print("Starting training from scratch...") '''Training''' for epoch in range(1, args.num_epochs + 1): print("epoch", epoch) batch_loss_avg = 0 if config.lr_schedule_type == 'exp': scheduler.step(epoch) for batch_idx, rgbd_label_xy in tqdm(enumerate(dataloader_tr), total=len(dataloader_tr), smoothing=0.9): x = rgbd_label_xy[0] target = rgbd_label_xy[1].long() if args.is_2_headed: target2 = rgbd_label_xy[3].long() xy = rgbd_label_xy[2] x = x.float() xy = xy.float() input = x.permute(0, 3, 1, 2).contiguous() input = input.type(torch.FloatTensor) if config.use_gpu: input = input.cuda() xy = xy.cuda() target = target.cuda() if args.is_2_headed: target2 = target2.cuda() xy = xy.permute(0, 3, 1, 2).contiguous() optimizer.zero_grad() model.train() if args.is_2_headed: output1, output2 = model(input, gnn_iterations=config.gnn_iterations, k=config.gnn_k, xy=xy, use_gnn=config.use_gnn) else: output = model(input, gnn_iterations=config.gnn_iterations, k=config.gnn_k, xy=xy, use_gnn=config.use_gnn) if config.use_bootstrap_loss: loss_per_pixel = loss.forward(log_softmax(output.float()), target) topk, indices = torch.topk( loss_per_pixel.view(output.size()[0], -1), int((config.crop_size**2) * config.bootstrap_rate)) loss_ = torch.mean(topk) else: if args.is_2_headed: loss_ = loss.forward(log_softmax( output1.float()), target) + loss2.forward( log_softmax(output2.float()), target2) else: loss_ = loss.forward(log_softmax(output.float()), target) loss_.backward() optimizer.step() batch_loss_avg += loss_.item() if batch_idx % interval_to_show == 0 and batch_idx > 0: batch_loss_avg /= interval_to_show train_losses.append(batch_loss_avg) logger.info("E%dB%d Batch loss average: %s", epoch, batch_idx, batch_loss_avg) print('\rEpoch:{}, Batch:{}, loss average:{}'.format( epoch, batch_idx, batch_loss_avg)) batch_loss_avg = 0 batch_idx = len(dataloader_tr) logger.info("E%dB%d Saving model...", epoch, batch_idx) torch.save(model.state_dict(), log_path + '/save/' + 'checkpoint_' + str(epoch) + '.pth') '''Evaluation''' # eval_loss, class_iou, confusion_matrix = eval_set(dataloader_va) # eval_losses.append(eval_loss) # # if config.lr_schedule_type == 'plateau': # scheduler.step(eval_loss) print('Learning ...') logger.info("E%dB%d Def learning rate: %s", epoch, batch_idx, get_current_learning_rates()[0]) print('Epoch{} Def learning rate: {}'.format( epoch, get_current_learning_rates()[0])) logger.info("E%dB%d GNN learning rate: %s", epoch, batch_idx, get_current_learning_rates()[1]) print('Epoch{} GNN learning rate: {}'.format( epoch, get_current_learning_rates()[1])) # logger.info("E%dB%d Eval loss: %s", epoch, batch_idx, eval_loss) # print('Epoch{} Eval loss: {}'.format(epoch, eval_loss)) # logger.info("E%dB%d Class IoU:", epoch, batch_idx) # print('Epoch{} Class IoU:'.format(epoch)) # for cl in range(len(idx_to_label)): # logger.info("%+10s: %-10s" % (idx_to_label[cl], class_iou[cl])) # print('{}:{}'.format(idx_to_label[cl], class_iou[cl])) # logger.info("Mean IoU: %s", np.mean(class_iou[1:])) # print("Mean IoU: %.2f" % np.mean(class_iou[1:])) # logger.info("E%dB%d Confusion matrix:", epoch, batch_idx) # logger.info(confusion_matrix) logger.info("Finished training!") logger.info("Saving model...") print('Saving final model...') torch.save(model.state_dict(), log_path + '/save/3dgnn_finish.pth')
def train(): # for watching in tensorboard tb = SummaryWriter() # load data transform = set_transform() train_loader = get_loader(train_corpus, batch_size=8, transform=transform) valid_loader = get_loader(valid_corpus, batch_size=8, transform=transform) ## Define Model and print model = Model(vocab) print(model) batch = next(iter(valid_loader)) # Adding Tensorboard grid = torchvision.utils.make_grid(batch[0]) tb.add_image('images', grid, 0) tb.add_graph(model, batch[0]) # Defining Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum) best_train_loss, best_valid_loss = 100000, 100000 train_loss, valid_loss = [], [] not_improved = 0 show_after_iter = 10 # Checking cuda is available or not gpu_available = torch.cuda.is_available() if gpu_available: #print("Found GPU. Model Shifting to GPU") model.cuda() print("*" * 30 + " Training Start " + "*" * 30) for e in range(1, epoch): ## Training Start ## model.train() for i, (image, classes) in enumerate(train_loader): if gpu_available: image = image.cuda() classes = classes.cuda() output = model(image) #_,pred = torch.max(output.data,1) loss = criterion(output, classes) # backprop loss.backward() optimizer.step() # loss move to cpu loss = loss.cpu().detach().numpy() train_loss.append(loss) if i % show_after_iter == 0: avg_loss = sum(train_loss) / len(train_loss) print( f"Epoch: ({e}/{epoch}) Loss: {loss} Avg Loss: {avg_loss} Accuracy: {100-loss} Avg Acc: {100-avg_loss}" ) del image, loss, classes avg_train_loss = sum(train_loss) / len(train_loss) # Adding value in tensorboard tb.add_scalar("Training_Loss", avg_train_loss, e) tb.add_scalar("Training_Accuracy", 100 - avg_train_loss, e) ## Validation Start ## model.eval() for i, (image, classes) in enumerate(valid_loader): if gpu_available: image = image.cuda() classes = classes.cuda() output = model(image) loss = criterion(output, classes) # loss move to cpu loss = loss.cpu().detach().numpy() valid_loss.append(loss) #print(f"Loss: {loss}") avg_valid_loss = sum(valid_loss) / len(valid_loss) # save if model loss is improved if avg_valid_loss < best_valid_loss: best_train_loss = avg_valid_loss model_save = save_path + "/best_model.th" torch.save(model.state_dict(), model_save) not_improved = 0 else: not_improved += 1 if not_improved >= 6: break print( f"\n\t Epoch: {e} Training Loss: {avg_train_loss} Training Accuracy: {100-avg_train_loss}" ) print( f"\t Epoch: {e} Validation Loss: {avg_valid_loss} Validation Accuracy: {100-avg_valid_loss} \n" ) # Adding value in tensorboard tb.add_scalar("Validation_Loss", avg_valid_loss, e) tb.add_scalar("Validation_Accuracy", 100 - avg_valid_loss, e) # Saving training and validation losses so tha further graph can be generated save_loss = {"train": train_loss, "valid": valid_loss} with open(save_path + "/losses.pickle", "wb") as files: pickle.dump(save_loss, files) tb.close()