class PointNetTrainer(): def __init__(self): self.model = PointNet().to(device) #self.model = PFFNet().to(device) self.lr = 1e-4 self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr) self.criterion = torch.nn.CrossEntropyLoss() self.epoches = 20 def train(self, loader): self.model.train() total_loss = 0 tot_num = 0 for i, data in enumerate(tqdm.tqdm(loader)): tot_num += len(data.y) data = data.to(device) self.optimizer.zero_grad() logits = self.model(data.pos, data.batch) loss = self.criterion(logits, data.y) loss.backward() self.optimizer.step() total_loss += loss.item() return total_loss / tot_num @torch.no_grad() def test(self, loader): self.model.eval() total_correct = 0 tot_num = 0 for i, data in enumerate(loader): if i > 50: break tot_num += len(data.y) data = data.to(device) logits = self.model(data.pos, data.batch) pred = logits.argmax(dim=-1) total_correct += (pred == data.y).sum() #print(total_correct,tot_num) return total_correct / tot_num def work(self, train_loader, test_loader): plt.figure() losses = [] accs = [] for epoch in range(self.epoches): loss = self.train(train_loader) acc = self.test(test_loader) print("epoch", epoch, "loss", loss, "acc", acc) losses.append(loss) accs.append(acc) plt.plot(losses) plt.plot(accs) plt.legend(['loss', 'acc']) plt.savefig('dump/curve.png')
def main(opt): train_dataset = Dataset(opt.dataroot, True) train_dataloader = Dataloader(train_dataset, batch_size=opt.batchSize, \ shuffle=False, num_workers=2) test_dataset = Dataset(opt.dataroot, False) test_dataloader = Dataloader(test_dataset, batch_size=opt.batchSize, \ shuffle=False, num_workers=2) net = PointNet(d=opt.d, feature_transform=opt.feature_transform) net.double() print(net) criterion = nn.CosineSimilarity(dim=2) optimizer = optim.Adam(net.parameters(), lr=opt.lr) with open('train.csv', 'a') as f: writer = csv.writer(f, lineterminator='\n') writer.writerow( ["train_loss", "train_gain", "baseline_loss", "baseline_gain"]) with open('test.csv', 'a') as f: writer = csv.writer(f, lineterminator='\n') writer.writerow( ["test_loss", "test_gain", "baseline_loss", "baseline_gain"]) start = time.time() for epoch in range(0, opt.niter): train(epoch, train_dataloader, net, criterion, optimizer, opt) test(test_dataloader, net, criterion, optimizer, opt) elapsed_time = time.time() - start with open('time.csv', 'a') as f: writer = csv.writer(f, lineterminator='\n') writer.writerow(["学習時間", elapsed_time])
def train(args, io): train_loader = DataLoader(ModelNet40(partition='train', num_points=args.num_points), num_workers=8, batch_size=args.batch_size, shuffle=True, drop_last=True) test_loader = DataLoader(ModelNet40(partition='test', num_points=args.num_points), num_workers=8, batch_size=args.test_batch_size, shuffle=True, drop_last=False) device = torch.device("cuda" if args.cuda else "cpu") #Try to load models if args.model == 'pointnet': model = PointNet(args).to(device) elif args.model == 'dgcnn': model = DGCNN(args).to(device) else: raise Exception("Not implemented") print(str(model)) model = nn.DataParallel(model) print("Let's use", torch.cuda.device_count(), "GPUs!") if args.use_sgd: print("Use SGD") opt = optim.SGD(model.parameters(), lr=args.lr * 100, momentum=args.momentum, weight_decay=1e-4) else: print("Use Adam") opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4) scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr) criterion = cal_loss best_test_acc = 0 for epoch in range(args.epochs): scheduler.step() #################### # Train #################### train_loss = 0.0 count = 0.0 model.train() train_pred = [] train_true = [] for data, label in train_loader: data, label = data.to(device), label.to(device).squeeze() data = data.permute(0, 2, 1) batch_size = data.size()[0] opt.zero_grad() logits = model(data) loss = criterion(logits, label) loss.backward() opt.step() preds = logits.max(dim=1)[1] count += batch_size train_loss += loss.item() * batch_size train_true.append(label.cpu().numpy()) train_pred.append(preds.detach().cpu().numpy()) train_true = np.concatenate(train_true) train_pred = np.concatenate(train_pred) outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f' % ( epoch, train_loss * 1.0 / count, metrics.accuracy_score(train_true, train_pred), metrics.balanced_accuracy_score(train_true, train_pred)) io.cprint(outstr) #################### # Test #################### test_loss = 0.0 count = 0.0 model.eval() test_pred = [] test_true = [] for data, label in test_loader: data, label = data.to(device), label.to(device).squeeze() data = data.permute(0, 2, 1) batch_size = data.size()[0] logits = model(data) loss = criterion(logits, label) preds = logits.max(dim=1)[1] count += batch_size test_loss += loss.item() * batch_size test_true.append(label.cpu().numpy()) test_pred.append(preds.detach().cpu().numpy()) test_true = np.concatenate(test_true) test_pred = np.concatenate(test_pred) test_acc = metrics.accuracy_score(test_true, test_pred) avg_per_class_acc = metrics.balanced_accuracy_score( test_true, test_pred) outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f' % ( epoch, test_loss * 1.0 / count, test_acc, avg_per_class_acc) io.cprint(outstr) if test_acc >= best_test_acc: best_test_acc = test_acc torch.save(model.state_dict(), 'checkpoints/%s/models/model.t7' % args.exp_name)
def train(modelin=args.model, modelout=args.out,device=args.device,opt=args.opt): # define model, dataloader, 3dmm eigenvectors, optimization method calib_net = PointNet(n=1) sfm_net = PointNet(n=199) if modelin != "": calib_path = os.path.join('model','calib_' + modelin) sfm_path = os.path.join('model','sfm_' + modelin) pretrained1 = torch.load(calib_path) pretrained2 = torch.load(sfm_path) calib_dict = calib_net.state_dict() sfm_dict = sfm_net.state_dict() pretrained1 = {k: v for k,v in pretrained1.items() if k in calib_dict} pretrained2 = {k: v for k,v in pretrained2.items() if k in sfm_dict} calib_dict.update(pretrained1) sfm_dict.update(pretrained2) calib_net.load_state_dict(pretrained1) sfm_net.load_state_dict(pretrained2) calib_net.to(device=device) sfm_net.to(device=device) opt1 = torch.optim.Adam(calib_net.parameters(),lr=1e-3) opt2 = torch.optim.Adam(sfm_net.parameters(),lr=1e-3) # dataloader data = dataloader.Data() loader = data.batchloader batch_size = data.batchsize # mean shape and eigenvectors for 3dmm mu_lm = torch.from_numpy(data.mu_lm).float()#.to(device=device) mu_lm[:,2] = mu_lm[:,2] * -1 mu_lm = torch.stack(batch_size * [mu_lm.to(device=device)]) shape = mu_lm lm_eigenvec = torch.from_numpy(data.lm_eigenvec).float().to(device=device) lm_eigenvec = torch.stack(batch_size * [lm_eigenvec]) M = data.M N = data.N # main training loop for epoch in itertools.count(): for j,batch in enumerate(loader): # get the input and gt values x_cam_gt = batch['x_cam_gt'].to(device=device) shape_gt = batch['x_w_gt'].to(device=device) fgt = batch['f_gt'].to(device=device) x_img = batch['x_img'].to(device=device) #beta_gt = batch['beta_gt'].to(device=device) #x_img_norm = batch['x_img_norm'] x_img_gt = batch['x_img_gt'].to(device=device).permute(0,2,1,3) batch_size = fgt.shape[0] one = torch.ones(batch_size,M*N,1).to(device=device) x_img_one = torch.cat([x_img,one],dim=2) x_cam_pt = x_cam_gt.permute(0,1,3,2).reshape(batch_size,6800,3) x = x_img.permute(0,2,1) #x = x_img.permute(0,2,1).reshape(batch_size,2,M,N) ptsI = x_img_one.reshape(batch_size,M,N,3).permute(0,1,3,2)[:,:,:2,:] # if just optimizing if not opt: # calibration f = calib_net(x) + 300 K = torch.zeros((batch_size,3,3)).float().to(device=device) K[:,0,0] = f.squeeze() K[:,1,1] = f.squeeze() K[:,2,2] = 1 # sfm betas = sfm_net(x) betas = betas.unsqueeze(-1) shape = mu_lm + torch.bmm(lm_eigenvec,betas).squeeze().view(batch_size,N,3) opt1.zero_grad() opt2.zero_grad() f_error = torch.mean(torch.abs(f - fgt)) #error2d = torch.mean(torch.abs(pred - x_img_gt)) error3d = torch.mean(torch.abs(shape - shape_gt)) error = f_error + error3d error.backward() opt1.step() opt2.step() print(f"f_error: {f_error.item():.3f} | error3d: {error3d.item():.3f} | f/fgt: {f[0].item():.1f}/{fgt[0].item():.1f} | f/fgt: {f[1].item():.1f}/{fgt[1].item():.1f} | f/fgt: {f[2].item():.1f}/{fgt[2].item():.1f} | f/fgt: {f[3].item():.1f}/{fgt[3].item():.1f} ") continue # get shape error from image projection print(f"f/fgt: {f[0].item():.3f}/{fgt[0].item():.3f} | rmse: {rmse:.3f} | f_rel: {f_error.item():.4f} | loss1: {loss1.item():.3f} | loss2: {loss2.item():.3f}") # save model and increment weight decay print("saving!") torch.save(sfm_net.state_dict(), os.path.join('model','sfm_'+modelout)) torch.save(calib_net.state_dict(), os.path.join('model','calib_'+modelout)) test(modelin=args.out,outfile=args.out,optimize=False)
def startcustomtraining(args, io): ft_loader = DataLoader(FT10(num_points=args.num_points), num_workers=8, batch_size=args.test_batch_size, shuffle=True, drop_last=True) ft_test_loader = DataLoader(FT11(num_points=args.num_points), num_workers=8, batch_size=args.test_batch_size, shuffle=True, drop_last=False) device = torch.device("cuda" if args.cuda else "cpu") #Try to load models if args.model == 'pointnet': model = PointNet(args).to(device) elif args.model == 'dgcnn': model = DGCNN(args).to(device) else: raise Exception("Not implemented") print(str(model)) model = nn.DataParallel(model) print("Let's use", torch.cuda.device_count(), "GPUs!") if args.use_sgd: print("Use SGD") opt = optim.SGD(model.parameters(), lr=args.lr * 100, momentum=args.momentum, weight_decay=1e-4) else: print("Use Adam") opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4) scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr) criterion = cal_loss best_ft_test_acc = 0.0 i = 0 train_accs = [] test_accs = [] epochs = [] for epoch in range(args.epochs): i += 1 scheduler.step() ft_loss = 0.0 count = 0 model.train() ft_pred = [] ft_true = [] for data, label in ft_loader: data, label = data.to(device), label.to(device).squeeze() data = data.permute(0, 2, 1) batch_size = data.size()[0] opt.zero_grad() logits = model(data) loss = criterion(logits, label) loss.backward() opt.step() preds = logits.max(dim=1)[1] count += batch_size ft_loss += loss.item() * batch_size ft_true.append(label.cpu().numpy()) ft_pred.append(preds.detach().cpu().numpy()) #print(data.shape, label.shape, logits.shape, preds.shape) #print('LABELS:', label) #print('PREDS:', preds) #print('LOGITS:', logits) ft_true = np.concatenate(ft_true) ft_pred = np.concatenate(ft_pred) ft_acc = metrics.accuracy_score(ft_true, ft_pred) avg_per_class_acc = metrics.balanced_accuracy_score(ft_true, ft_pred) outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f' % ( epoch, ft_loss * 1.0 / count, ft_acc, avg_per_class_acc) io.cprint(outstr) train_accs.append(ft_acc) ft_test_loss = 0.0 count = 0 model.eval() ft_test_pred = [] ft_test_true = [] for data, label in ft_test_loader: data, label = data.to(device), label.to(device).squeeze() data = data.permute(0, 2, 1) batch_size = data.size()[0] logits = model(data) loss = criterion(logits, label) preds = logits.max(dim=1)[1] count += batch_size ft_test_loss += loss.item() * batch_size ft_test_true.append(label.cpu().numpy()) ft_test_pred.append(preds.detach().cpu().numpy()) #print(data.shape, label.shape, logits.shape, preds.shape) #print('LABELS:', label) #print('PREDS:', preds) #print('LOGITS:', logits) ft_test_true = np.concatenate(ft_test_true) ft_test_pred = np.concatenate(ft_test_pred) ft_test_acc = metrics.accuracy_score(ft_test_true, ft_test_pred) avg_per_class_acc = metrics.balanced_accuracy_score( ft_test_true, ft_test_pred) outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f' % ( epoch, ft_test_loss * 1.0 / count, ft_test_acc, avg_per_class_acc) io.cprint(outstr) if ft_test_acc > best_ft_test_acc: print('save now') best_ft_test_acc = ft_test_acc torch.save(model.state_dict(), 'pretrained/custommodel.t7') #torch.save(model.state_dict(), 'pretrained/custommodel.t7') epochs.append(i) test_accs.append(ft_test_acc) fig, ax = plt.subplots() ax.plot(epochs, train_accs, color='blue', label='train acc') ax.plot(epochs, test_accs, color='red', label='test acc') ax.set(xlabel='epoch', ylabel='accuracy', title='accuracy values per epoch') ax.grid() ax.legend() fig.savefig("accuracy.png") plt.show()
writer = SummaryWriter('./output/runs/tersorboard') torch.manual_seed(SEED) device = torch.device(f'cuda:{gpus[0]}' if torch.cuda.is_available() else 'cpu') print("Loading train dataset...") train_data = PointNetDataset(path, train=0) # shuffle = True, 打乱后在输出 train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) print("Loading valid dataset...") #val_data = PointNetDataset("../../dataset/modelnet40_normal_resampled/", train=1) val_data = PointNetDataset(path, train=1) val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True) print("Set model and optimizer...") model = PointNet().to(device=device) # 初始化优化器 optimizer = optim.Adam(model.parameters(), lr=lr) scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=decay_lr_every, gamma=decay_lr_factor) best_acc = 0.0 model.train() # %% print("Start training...") for epoch in range(epochs): acc_loss = 0.0 num_samples = 0 start_tic = time.time() for x, y in train_loader: x = x.to(device) y = y.to(device)
def train(args, io): train_loader = DataLoader(ModelNet40(partition='train', num_points=args.num_points), num_workers=8, batch_size=args.batch_size, shuffle=True, drop_last=True) test_loader = DataLoader(ModelNet40(partition='test', num_points=args.num_points), num_workers=8, batch_size=args.test_batch_size, shuffle=True, drop_last=False) device = torch.device("cuda" if args.cuda else "cpu") #Try to load models if args.model == 'pointnet': model = PointNet(args).to(device) elif args.model == 'dgcnn': model = DGCNN(args).to(device) elif args.model == 'semigcn': model = SemiGCN(args).to(device) else: raise Exception("Not implemented") print(str(model)) model = nn.DataParallel(model) print("Let's use", torch.cuda.device_count(), "GPUs!") if args.use_sgd: print("Use SGD") opt = optim.SGD(model.parameters(), lr=args.lr*100, momentum=args.momentum, weight_decay=1e-4) else: print("Use Adam") opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) opt.load_state_dict(checkpoint['opt']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) #scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr, last_epoch=args.start_epoch-1) scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=20, gamma=0.8)#0.7 #scheduler = torch.optim.lr_scheduler.ExponentialLR(opt, gamma=0.9825, last_epoch=args.start_epoch-1) criterion = cal_loss best_test_acc = 0 for epoch in range(args.start_epoch, args.epochs): #scheduler.step() #################### # Train #################### train_loss = 0.0 count = 0.0 model.train() train_pred = [] train_true = [] for data, label in train_loader: data, label = data.to(device), label.to(device).squeeze() data = data.permute(0, 2, 1) batch_size = data.size()[0] opt.zero_grad() logits = model(data) loss = criterion(logits, label) loss.backward() opt.step() preds = logits.max(dim=1)[1] count += batch_size train_loss += loss.item() * batch_size train_true.append(label.cpu().numpy()) train_pred.append(preds.detach().cpu().numpy()) scheduler.step() train_true = np.concatenate(train_true) train_pred = np.concatenate(train_pred) outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f' % (epoch, train_loss*1.0/count, metrics.accuracy_score( train_true, train_pred), metrics.balanced_accuracy_score( train_true, train_pred)) io.cprint(outstr) if epoch%10 == 0: # save running checkpoint per 10 epoch torch.save({'epoch': epoch + 1, 'arch': args.model, 'state_dict': model.state_dict(), 'opt' : opt.state_dict()}, 'checkpoints/%s/models/checkpoint_latest.pth.tar' % args.exp_name) #################### # Test #################### test_loss = 0.0 count = 0.0 model.eval() test_pred = [] test_true = [] for data, label in test_loader: data, label = data.to(device), label.to(device).squeeze() data = data.permute(0, 2, 1) batch_size = data.size()[0] logits = model(data) loss = criterion(logits, label) preds = logits.max(dim=1)[1] count += batch_size test_loss += loss.item() * batch_size test_true.append(label.cpu().numpy()) test_pred.append(preds.detach().cpu().numpy()) test_true = np.concatenate(test_true) test_pred = np.concatenate(test_pred) test_acc = metrics.accuracy_score(test_true, test_pred) avg_per_class_acc = metrics.balanced_accuracy_score(test_true, test_pred) outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f' % (epoch, test_loss*1.0/count, test_acc, avg_per_class_acc) io.cprint(outstr) if test_acc >= best_test_acc: best_test_acc = test_acc torch.save({'epoch': epoch + 1, 'arch': args.model, 'state_dict': model.state_dict(), 'opt' : opt.state_dict()}, 'checkpoints/%s/models/checkpoint_best.pth.tar' % args.exp_name)
from torch.utils.data import DataLoader train_loader = DataLoader(ModelNet40(partition='train', num_points=1024), num_workers=8, batch_size=32, shuffle=True, drop_last=True) test_loader = DataLoader(ModelNet40(partition='test', num_points=1024), num_workers=8, batch_size=32, shuffle=True, drop_last=False) model = PointNet().cuda() criterion = nn.CrossEntropyLoss() opt = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9) io = IOStream('checkpoints/run.log') for epoch in range(100): ################# #Train ################# train_loss = 0.0 count = 0.0 model.train() train_pred = [] train_true = [] for data, label in tqdm(train_loader): data = data.permute(0, 2, 1) data, label = data.cuda(), label.squeeze().cuda() batch_size = data.size()[0] opt.zero_grad()
def train(args, io): train_loader = DataLoader(ModelNet40(partition='train', num_points=args.num_points), num_workers=8, batch_size=args.batch_size, shuffle=True, drop_last=True) test_loader = DataLoader(ModelNet40(partition='test', num_points=args.num_points), num_workers=8, batch_size=args.test_batch_size, shuffle=True, drop_last=False) device = torch.device("cuda" if args.cuda else "cpu") #Try to load models if args.model == 'pointnet': model = PointNet(args).to(device) elif args.model == 'dgcnn': model = DGCNN(args).to(device) elif args.model == 'ssg': model = PointNet2SSG(output_classes=40, dropout_prob=args.dropout) model.to(device) elif args.model == 'msg': model = PointNet2MSG(output_classes=40, dropout_prob=args.dropout) model.to(device) elif args.model == 'ognet': # [64,128,256,512] model = Model_dense(20, args.feature_dims, [512], output_classes=40, init_points=768, input_dims=3, dropout_prob=args.dropout, id_skip=args.id_skip, drop_connect_rate=args.drop_connect_rate, cluster='xyzrgb', pre_act=args.pre_act, norm=args.norm_layer) if args.efficient: model = ModelE_dense(20, args.feature_dims, [512], output_classes=40, init_points=768, input_dims=3, dropout_prob=args.dropout, id_skip=args.id_skip, drop_connect_rate=args.drop_connect_rate, cluster='xyzrgb', pre_act=args.pre_act, norm=args.norm_layer, gem=args.gem, ASPP=args.ASPP) model.to(device) elif args.model == 'ognet-small': # [48,96,192,384] model = Model_dense(20, args.feature_dims, [512], output_classes=40, init_points=768, input_dims=3, dropout_prob=args.dropout, id_skip=args.id_skip, drop_connect_rate=args.drop_connect_rate, cluster='xyzrgb', pre_act=args.pre_act, norm=args.norm_layer) model.to(device) else: raise Exception("Not implemented") print(str(model)) model = nn.DataParallel(model) print("Let's use", torch.cuda.device_count(), "GPUs!") if args.use_sgd: print("Use SGD") opt = optim.SGD(model.parameters(), lr=args.lr * 100, momentum=args.momentum, weight_decay=1e-4) scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr) else: print("Use Adam") opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4) scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=0.01 * args.lr) criterion = cal_loss best_test_acc = 0 best_avg_per_class_acc = 0 warm_up = 0.1 # We start from the 0.1*lrRate warm_iteration = round( len(ModelNet40(partition='train', num_points=args.num_points)) / args.batch_size) * args.warm_epoch # first 5 epoch for epoch in range(args.epochs): scheduler.step() #################### # Train #################### train_loss = 0.0 count = 0.0 model.train() train_pred = [] train_true = [] for data, label in train_loader: data, label = data.to(device), label.to(device).squeeze() batch_size = data.size()[0] opt.zero_grad() if args.model == 'ognet' or args.model == 'ognet-small' or args.model == 'ssg' or args.model == 'msg': logits = model(data, data) else: data = data.permute(0, 2, 1) logits = model(data) loss = criterion(logits, label) if epoch < args.warm_epoch: warm_up = min(1.0, warm_up + 0.9 / warm_iteration) loss *= warm_up loss.backward() opt.step() preds = logits.max(dim=1)[1] count += batch_size train_loss += loss.item() * batch_size train_true.append(label.cpu().numpy()) train_pred.append(preds.detach().cpu().numpy()) train_true = np.concatenate(train_true) train_pred = np.concatenate(train_pred) outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f' % ( epoch, train_loss * 1.0 / count, metrics.accuracy_score(train_true, train_pred), metrics.balanced_accuracy_score(train_true, train_pred)) io.cprint(outstr) #################### # Test #################### test_loss = 0.0 count = 0.0 model.eval() test_pred = [] test_true = [] for data, label in test_loader: data, label = data.to(device), label.to(device).squeeze() batch_size = data.size()[0] if args.model == 'ognet' or args.model == 'ognet-small' or args.model == 'ssg' or args.model == 'msg': logits = model(data, data) else: data = data.permute(0, 2, 1) logits = model(data) loss = criterion(logits, label) preds = logits.max(dim=1)[1] count += batch_size test_loss += loss.item() * batch_size test_true.append(label.cpu().numpy()) test_pred.append(preds.detach().cpu().numpy()) test_true = np.concatenate(test_true) test_pred = np.concatenate(test_pred) test_acc = metrics.accuracy_score(test_true, test_pred) avg_per_class_acc = metrics.balanced_accuracy_score( test_true, test_pred) outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f' % ( epoch, test_loss * 1.0 / count, test_acc, avg_per_class_acc) io.cprint(outstr) if test_acc + avg_per_class_acc >= best_test_acc + best_avg_per_class_acc: best_test_acc = test_acc best_avg_per_class_acc = avg_per_class_acc print('This is the current best.') torch.save(model.state_dict(), 'checkpoints/%s/models/model.t7' % args.exp_name)
def test(modelin=args.model,outfile=args.out,optimize=args.opt): # define model, dataloader, 3dmm eigenvectors, optimization method calib_net = PointNet(n=1) sfm_net = PointNet(n=199) if modelin != "": calib_path = os.path.join('model','calib_' + modelin) sfm_path = os.path.join('model','sfm_' + modelin) calib_net.load_state_dict(torch.load(calib_path)) sfm_net.load_state_dict(torch.load(sfm_path)) calib_net.eval() sfm_net.eval() # mean shape and eigenvectors for 3dmm M = 100 data3dmm = dataloader.SyntheticLoader() mu_lm = torch.from_numpy(data3dmm.mu_lm).float().detach() mu_lm[:,2] = mu_lm[:,2]*-1 lm_eigenvec = torch.from_numpy(data3dmm.lm_eigenvec).float().detach() sigma = torch.from_numpy(data3dmm.sigma).float().detach() sigma = torch.diag(sigma.squeeze()) lm_eigenvec = torch.mm(lm_eigenvec, sigma) # sample from f testing set allerror_2d = [] allerror_3d = [] allerror_rel3d = [] allerror_relf = [] all_f = [] all_fpred = [] all_depth = [] out_shape = [] out_f = [] seterror_3d = [] seterror_rel3d = [] seterror_relf = [] seterror_2d = [] f_vals = [i*100 for i in range(4,15)] for f_test in f_vals: # create dataloader loader = dataloader.TestLoader(f_test) f_pred = [] shape_pred = [] error_2d = [] error_3d = [] error_rel3d = [] error_relf = [] M = 100; N = 68; batch_size = 1; for j,data in enumerate(loader): if j == 10: break # load the data x_cam_gt = data['x_cam_gt'] shape_gt = data['x_w_gt'] fgt = data['f_gt'] x_img = data['x_img'] x_img_gt = data['x_img_gt'] T_gt = data['T_gt'] all_depth.append(np.mean(T_gt[:,2])) all_f.append(fgt.numpy()[0]) ptsI = x_img.reshape((M,N,2)).permute(0,2,1) x = x_img.unsqueeze(0).permute(0,2,1) # run the model f = calib_net(x) + 300 betas = sfm_net(x) betas = betas.squeeze(0).unsqueeze(-1) shape = mu_lm + torch.mm(lm_eigenvec,betas).squeeze().view(N,3) # additional optimization on initial solution if optimize: calib_net.load_state_dict(torch.load(calib_path)) sfm_net.load_state_dict(torch.load(sfm_path)) calib_net.train() sfm_net.train() opt1 = torch.optim.Adam(calib_net.parameters(),lr=1e-4) opt2 = torch.optim.Adam(sfm_net.parameters(),lr=1e-2) curloss = 100 for outerloop in itertools.count(): # camera calibration shape = shape.detach() for iter in itertools.count(): opt1.zero_grad() print(x.shape) quit() f = calib_net(x) + 300 K = torch.zeros(3,3).float() K[0,0] = f K[1,1] = f K[2,2] = 1 f_error = torch.mean(torch.abs(f - fgt)) rmse = torch.norm(shape_gt - shape,dim=1).mean() # differentiable PnP pose estimation km,c_w,scaled_betas, alphas = util.EPnP(ptsI,shape,K) Xc, R, T, mask = util.optimizeGN(km,c_w,scaled_betas,alphas,shape,ptsI,K) error2d = util.getReprojError2(ptsI,shape,R,T,K,show=False,loss='l1') #error2d = util.getReprojError2_(ptsI,Xc,K,show=True,loss='l1') error_time = util.getTimeConsistency(shape,R,T) loss = error2d.mean() + 0.01*error_time if iter == 5: break loss.backward() opt1.step() print(f"iter: {iter} | error: {loss.item():.3f} | f/fgt: {f.item():.1f}/{fgt[0].item():.1f} | error2d: {error2d.mean().item():.3f} | rmse: {rmse.item():.3f} ") # sfm f = f.detach() for iter in itertools.count(): opt2.zero_grad() # shape prediction betas = sfm_net(x) shape = torch.sum(betas * lm_eigenvec,1) shape = shape.reshape(68,3) + mu_lm K = torch.zeros((3,3)).float() K[0,0] = f K[1,1] = f K[2,2] = 1 #rmse = torch.norm(shape_gt - shape,dim=1).mean().detach() rmse = torch.norm(shape_gt - shape,dim=1).mean().detach() # differentiable PnP pose estimation km,c_w,scaled_betas,alphas = util.EPnP(ptsI,shape,K) Xc, R, T, mask = util.optimizeGN(km,c_w,scaled_betas,alphas,shape,ptsI,K) error2d = util.getReprojError2(ptsI,shape,R,T,K,show=False,loss='l1') #loss = rmse loss = error2d.mean() if iter == 5: break if iter > 10 and prev_loss < loss: break else: prev_loss = loss loss.backward() opt2.step() print(f"iter: {iter} | error: {loss.item():.3f} | f/fgt: {f.item():.1f}/{fgt[0].item():.1f} | error2d: {error2d.mean().item():.3f} | rmse: {rmse.item():.3f} ") # closing condition for outerloop on dual objective if torch.abs(curloss - loss) < 0.01: break curloss = loss else: K = torch.zeros(3,3).float() K[0,0] = f K[1,1] = f K[2,2] = 1 km,c_w,scaled_betas,alphas = util.EPnP(ptsI,shape,K) Xc, R, T, mask = util.optimizeGN(km,c_w,scaled_betas,alphas,shape,ptsI,K) all_fpred.append(f.detach().numpy()[0]) # get errors reproj_errors2 = util.getReprojError2(ptsI,shape,R,T,K,show=False) reproj_errors3 = torch.norm(shape_gt - shape,dim=1).mean() rel_errors = util.getRelReprojError3(x_cam_gt,shape,R,T) reproj_error = reproj_errors2.mean() reconstruction_error = reproj_errors3.mean() rel_error = rel_errors.mean() f_error = torch.abs(fgt - f) / fgt # save final prediction f_pred.append(f.detach().cpu().item()) shape_pred.append(shape.detach().cpu().numpy()) allerror_3d.append(reproj_error.data.numpy()) allerror_2d.append(reconstruction_error.data.numpy()) allerror_rel3d.append(rel_error.data.numpy()) error_2d.append(reproj_error.cpu().data.item()) error_3d.append(reconstruction_error.cpu().data.item()) error_rel3d.append(rel_error.cpu().data.item()) error_relf.append(f_error.cpu().data.item()) print(f"f/sequence: {f_test}/{j} | f/fgt: {f[0].item():.3f}/{fgt.item():.3f} | f_error_rel: {f_error.item():.4f} | rmse: {reconstruction_error.item():.4f} | rel rmse: {rel_error.item():.4f} | 2d error: {reproj_error.item():.4f}") avg_2d = np.mean(error_2d) avg_rel3d = np.mean(error_rel3d) avg_3d = np.mean(error_3d) avg_relf = np.mean(error_relf) seterror_2d.append(avg_2d) seterror_3d.append(avg_3d) seterror_rel3d.append(avg_rel3d) seterror_relf.append(avg_relf) out_f.append(np.stack(f_pred)) out_shape.append(np.stack(shape_pred,axis=0)) print(f"f_error_rel: {avg_relf:.4f} | rel rmse: {avg_rel3d:.4f} | 2d error: {reproj_error.item():.4f} | rmse: {avg_3d:.4f} |") out_shape = np.stack(out_shape) out_f = np.stack(out_f) all_f = np.stack(all_f).flatten() all_fpred = np.stack(all_fpred).flatten() all_d = np.stack(all_depth).flatten() allerror_2d = np.stack(allerror_2d).flatten() allerror_3d = np.stack(allerror_3d).flatten() allerror_rel3d = np.stack(allerror_rel3d).flatten() matdata = {} matdata['fvals'] = np.array(f_vals) matdata['all_f'] = np.array(all_f) matdata['all_fpred'] = np.array(all_fpred) matdata['all_d'] = np.array(all_depth) matdata['error_2d'] = allerror_2d matdata['error_3d'] = allerror_3d matdata['error_rel3d'] = allerror_rel3d matdata['seterror_2d'] = np.array(seterror_2d) matdata['seterror_3d'] = np.array(seterror_3d) matdata['seterror_rel3d'] = np.array(seterror_rel3d) matdata['seterror_relf'] = np.array(seterror_relf) matdata['shape'] = np.stack(out_shape) matdata['f'] = np.stack(out_f) scipy.io.savemat(outfile,matdata) print(f"MEAN seterror_2d: {np.mean(seterror_2d)}") print(f"MEAN seterror_3d: {np.mean(seterror_3d)}") print(f"MEAN seterror_rel3d: {np.mean(seterror_rel3d)}") print(f"MEAN seterror_relf: {np.mean(seterror_relf)}")
def testBIWIID(modelin=args.model,outfile=args.out,optimize=args.opt): # define model, dataloader, 3dmm eigenvectors, optimization method calib_net = PointNet(n=1) sfm_net = PointNet(n=199) if modelin != "": calib_path = os.path.join('model','calib_' + modelin) sfm_path = os.path.join('model','sfm_' + modelin) calib_net.load_state_dict(torch.load(calib_path)) sfm_net.load_state_dict(torch.load(sfm_path)) calib_net.eval() sfm_net.eval() # mean shape and eigenvectors for 3dmm data3dmm = dataloader.SyntheticLoader() mu_lm = torch.from_numpy(data3dmm.mu_lm).float().detach() mu_lm[:,2] = mu_lm[:,2]*-1 lm_eigenvec = torch.from_numpy(data3dmm.lm_eigenvec).float().detach() sigma = torch.from_numpy(data3dmm.sigma).float().detach() sigma = torch.diag(sigma.squeeze()) lm_eigenvec = torch.mm(lm_eigenvec, sigma) # define loader loader = dataloader.BIWIIDLoader() f_pred = [] shape_pred = [] error_2d = [] error_relf = [] error_rel3d = [] for idx in range(len(loader)): batch = loader[idx] x_cam_gt = batch['x_cam_gt'] fgt = batch['f_gt'] x_img = batch['x_img'] x_img_gt = batch['x_img_gt'] M = x_img_gt.shape[0] N = 68 ptsI = x_img.reshape((M,N,2)).permute(0,2,1) x = ptsI.unsqueeze(0).permute(0,2,1,3) # run the model f = calib_net(x) + 300 betas = sfm_net(x) betas = betas.squeeze(0).unsqueeze(-1) shape = mu_lm + torch.mm(lm_eigenvec,betas).squeeze().view(N,3) # additional optimization on initial solution if optimize: calib_net.load_state_dict(torch.load(calib_path)) sfm_net.load_state_dict(torch.load(sfm_path)) calib_net.train() sfm_net.train() opt1 = torch.optim.Adam(calib_net.parameters(),lr=1e-4) opt2 = torch.optim.Adam(sfm_net.parameters(),lr=1e-2) curloss = 100 for outerloop in itertools.count(): # camera calibration shape = shape.detach() for iter in itertools.count(): opt1.zero_grad() f = calib_net.forward2(x) + 300 K = torch.zeros(3,3).float() K[0,0] = f K[1,1] = f K[2,2] = 1 f_error = torch.mean(torch.abs(f - fgt)) #rmse = torch.norm(shape_gt - shape,dim=1).mean() # differentiable PnP pose estimation km,c_w,scaled_betas, alphas = util.EPnP(ptsI,shape,K) Xc, R, T, mask = util.optimizeGN(km,c_w,scaled_betas,alphas,shape,ptsI,K) error2d = util.getReprojError2(ptsI,shape,R,T,K,show=False,loss='l1') error_time = util.getTimeConsistency(shape,R,T) #error_shape = util.get3DConsistency(ptsI,shape,kinv,R,T) order = torch.pow(10,-1*torch.floor(torch.log10(error_time)).detach()) #loss = error2d.mean() + order*error_time loss = error2d.mean() if iter == 5: break #if iter > 10 and prev_loss < loss: # break #else: # prev_loss = loss loss.backward() opt1.step() print(f"iter: {iter} | error: {loss.item():.3f} | f/fgt: {f.item():.1f}/{fgt[0].item():.1f} | error2d: {error2d.mean().item():.3f} ") # sfm f = f.detach() for iter in itertools.count(): opt2.zero_grad() # shape prediction betas = sfm_net.forward2(x) shape = torch.sum(betas * lm_eigenvec,1) shape = shape.reshape(68,3) + mu_lm K = torch.zeros((3,3)).float() K[0,0] = f K[1,1] = f K[2,2] = 1 #rmse = torch.norm(shape_gt - shape,dim=1).mean().detach() #rmse = torch.norm(shape_gt - shape,dim=1).mean().detach() # differentiable PnP pose estimation km,c_w,scaled_betas,alphas = util.EPnP(ptsI,shape,K) Xc, R, T, mask = util.optimizeGN(km,c_w,scaled_betas,alphas,shape,ptsI,K) error2d = util.getReprojError2(ptsI,shape,R,T,K,show=False,loss='l1') #loss = rmse loss = error2d.mean() if iter == 5: break if iter > 10 and prev_loss < loss: break else: prev_loss = loss loss.backward() opt2.step() print(f"iter: {iter} | error: {loss.item():.3f} | f/fgt: {f.item():.1f}/{fgt[0].item():.1f} | error2d: {error2d.mean().item():.3f} ") # closing condition for outerloop on dual objective if torch.abs(curloss - loss) < 0.01: break curloss = loss else: K = torch.zeros(3,3).float() K[0,0] = f K[1,1] = f K[2,2] = 1 km,c_w,scaled_betas,alphas = util.EPnP(ptsI,shape,K) Xc, R, T, mask = util.optimizeGN(km,c_w,scaled_betas,alphas,shape,ptsI,K) # get errors reproj_errors2 = util.getReprojError2(ptsI,shape,R,T,K) rel_errors = util.getRelReprojError3(x_cam_gt,shape,R,T) reproj_error = reproj_errors2.mean() rel_error = rel_errors.mean() f_error = torch.abs(fgt - f) / fgt # save final prediction f_pred.append(f.detach().cpu().item()) shape_pred.append(shape.detach().cpu().numpy()) error_2d.append(reproj_error.cpu().data.item()) error_rel3d.append(rel_error.cpu().data.item()) error_relf.append(f_error.cpu().data.item()) print(f" f/fgt: {f[0].item():.3f}/{fgt.item():.3f} | f_error_rel: {f_error.item():.4f} | rel rmse: {rel_error.item():.4f} | 2d error: {reproj_error.item():.4f}") #end for # prepare output file out_shape = np.stack(shape_pred) out_f = np.stack(f_pred) matdata = {} matdata['shape'] = np.stack(out_shape) matdata['f'] = np.stack(out_f) matdata['error_2d'] = np.array(error_2d) matdata['error_rel3d'] = np.array(error_rel3d) matdata['error_relf'] = np.array(error_relf) scipy.io.savemat(outfile,matdata) print(f"MEAN seterror_2d: {np.mean(error_2d)}") print(f"MEAN seterror_rel3d: {np.mean(error_rel3d)}") print(f"MEAN seterror_relf: {np.mean(error_relf)}")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("Loading train dataset...") train_data = PointNetDataset(dataset_path, train=0) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) print("Loading valid dataset...") val_data = PointNetDataset(dataset_path, train=1) val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True) print("Set model and optimizer...") model = PointNet().to(device=device) optimizer = optim.Adam(model.parameters(), lr=lr) #lr 为学习率 梯度下降的步长 scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=decay_lr_every, gamma=decay_lr_factor) # 损失不下降的时候,调整优化器optimizer best_acc = 0.0 model.train() print("Start trainning...") for epoch in range(epochs): acc_loss = 0.0 num_samples = 0 start_tic = time.time() for x, y in train_loader: x = x.to(device) y = y.to(device)
def train(args, io): train_loader = DataLoader(ModelNet40(partition='train'), num_workers=8, batch_size=args.batch_size, shuffle=True, drop_last=True) test_loader = DataLoader(ModelNet40(partition='test'), num_workers=8, batch_size=args.batch_size, shuffle=True, drop_last=False) device = torch.device("cuda:0") model = PointNet().to(device) print(str(model)) print("Use Adam") opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-6) scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr) criterion = cal_loss best_test_acc = 0 for epoch in range(args.epochs): scheduler.step() #################### # Train #################### train_loss = 0.0 count = 0.0 model.train() train_pred = [] train_true = [] for data, label in train_loader: data, label = data.to(device), label.to(device).squeeze() batch_size = data.size()[0] opt.zero_grad() logits = model(data.float()) loss = criterion(logits, label) loss.backward() opt.step() preds = logits.max(dim=1)[1] count += batch_size train_loss += loss.item() * batch_size train_true.append(label.cpu().numpy()) train_pred.append(preds.detach().cpu().numpy()) train_true = np.concatenate(train_true) train_pred = np.concatenate(train_pred) outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f' % ( epoch, train_loss * 1.0 / count, metrics.accuracy_score(train_true, train_pred), metrics.balanced_accuracy_score(train_true, train_pred)) io.cprint(outstr) #################### # Test #################### test_loss = 0.0 count = 0.0 model.eval() test_pred = [] test_true = [] for data, label in test_loader: data, label = data.to(device), label.to(device).squeeze() batch_size = data.size()[0] logits = model(data.float()) loss = criterion(logits, label) preds = logits.max(dim=1)[1] count += batch_size test_loss += loss.item() * batch_size test_true.append(label.cpu().numpy()) test_pred.append(preds.detach().cpu().numpy()) test_true = np.concatenate(test_true) test_pred = np.concatenate(test_pred) test_acc = metrics.accuracy_score(test_true, test_pred) avg_per_class_acc = metrics.balanced_accuracy_score( test_true, test_pred) outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f' % ( epoch, test_loss * 1.0 / count, test_acc, avg_per_class_acc) io.cprint(outstr) if test_acc >= best_test_acc: best_test_acc = test_acc torch.save(model.state_dict(), 'checkpoints/%s/models/model.t7' % args.exp_name) print('Saving ckpt with acc: %f' % best_test_acc)
def main(opt): train_dataset = BADataset(opt.dataroot, opt.L, True, False, False) train_dataloader = BADataloader(train_dataset, batch_size=opt.batchSize, \ shuffle=True, num_workers=opt.workers, drop_last=True) valid_dataset = BADataset(opt.dataroot, opt.L, False, True, False) valid_dataloader = BADataloader(valid_dataset, batch_size=opt.batchSize, \ shuffle=True, num_workers=opt.workers, drop_last=True) test_dataset = BADataset(opt.dataroot, opt.L, False, False, True) test_dataloader = BADataloader(test_dataset, batch_size=opt.batchSize, \ shuffle=True, num_workers=opt.workers, drop_last=True) all_dataset = BADataset(opt.dataroot, opt.L, False, False, False) all_dataloader = BADataloader(all_dataset, batch_size=opt.batchSize, \ shuffle=False, num_workers=opt.workers, drop_last=False) net = PointNet(d0=opt.d0, d1=opt.d1, d2=opt.d2, d3=opt.d3, d4=opt.d4, d5=opt.d5, d6=opt.d6) net.double() print(net) criterion = nn.CosineSimilarity(dim=1) if opt.cuda: net.cuda() criterion.cuda() optimizer = optim.Adam(net.parameters(), lr=opt.lr) early_stopping = EarlyStopping(patience=opt.patience, verbose=True) os.makedirs(OutputDir, exist_ok=True) train_loss_ls = [] valid_loss_ls = [] test_loss_ls = [] for epoch in range(0, opt.niter): train_loss = train(epoch, train_dataloader, net, criterion, optimizer, opt) valid_loss = valid(valid_dataloader, net, criterion, opt) test_loss = test(test_dataloader, net, criterion, opt) train_loss_ls.append(train_loss) valid_loss_ls.append(valid_loss) test_loss_ls.append(test_loss) early_stopping(valid_loss, net, OutputDir) if early_stopping.early_stop: print("Early stopping") break df = pd.DataFrame({ 'epoch': [i for i in range(1, len(train_loss_ls) + 1)], 'train_loss': train_loss_ls, 'valid_loss': valid_loss_ls, 'test_loss': test_loss_ls }) df.to_csv(OutputDir + '/loss.csv', index=False) net.load_state_dict(torch.load(OutputDir + '/checkpoint.pt')) inference(all_dataloader, net, opt, OutputDir)
type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--num_points', type=int, default=4096, help='num of points to use') parser.add_argument('--dropout', type=float, default=0.5, help='dropout rate') parser.add_argument('--emb_dims', type=int, default=1024, metavar='N', help='Dimension of embeddings') parser.add_argument('--k', type=int, default=40, metavar='N', help='Num of nearest neighbors to use') args = parser.parse_args() # load models if args.model == 'pointnet': model = PointNet(args) elif args.model == 'dgcnn': model = DGCNN(args) print('#parameters %d' % sum([x.nelement() for x in model.parameters()]))
def softXEnt(prediction, real_class): # TODO: return loss here def get_eval_acc_results(model, data_loader, device): """ ACC """ seq_id = 0 model.eval() distribution = np.zeros([5]) confusion_matrix = np.zeros([5, 5]) pred_ys = [] gt_ys = [] with torch.no_grad(): accs = [] for x, y in data_loader: x = x.to(device) y = y.to(device) # TODO: put x into network and get out out = # TODO: get pred_y from out pred_y = gt = np.argmax(y.cpu().numpy(), axis=1) # TODO: calculate acc from pred_y and gt acc = gt_ys = np.append(gt_ys, gt) pred_ys = np.append(pred_ys, pred_y) idx = gt accs.append(acc) return np.mean(accs) if __name__ == "__main__": writer = SummaryWriter('./output/runs/tersorboard') torch.manual_seed(SEED) device = torch.device(f'cuda:{gpus[0]}' if torch.cuda.is_available() else 'cpu') print("Loading train dataset...") train_data = PointNetDataset("../../../dataset/modelnet40_normal_resampled", train=0) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) print("Loading valid dataset...") val_data = PointNetDataset("../../../dataset/modelnet40_normal_resampled/", train=1) val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True) print("Set model and optimizer...") model = PointNet().to(device=device) optimizer = optim.Adam(model.parameters(), lr=lr) scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=decay_lr_every, gamma=decay_lr_factor) best_acc = 0.0 model.train() print("Start trainning...") for epoch in range(epochs): acc_loss = 0.0 num_samples = 0 start_tic = time.time() for x, y in train_loader: x = x.to(device) y = y.to(device) # TODO: set grad to zero # TODO: put x into network and get out out = loss = softXEnt(out, y) # TODO: loss backward # TODO: update network's param acc_loss += batch_size * loss.item() num_samples += y.shape[0] global_step += 1 acc = np.sum(np.argmax(out.cpu().detach().numpy(), axis=1) == np.argmax(y.cpu().detach().numpy(), axis=1)) / len(y) # print('acc: ', acc) if (global_step + 1) % show_every == 0: # ...log the running loss writer.add_scalar('training loss', acc_loss / num_samples, global_step) writer.add_scalar('training acc', acc, global_step) # print( f"loss at epoch {epoch} step {global_step}:{loss.item():3f}, lr:{optimizer.state_dict()['param_groups'][0]['lr']: .6f}, time:{time.time() - start_tic: 4f}sec") scheduler.step() print(f"loss at epoch {epoch}:{acc_loss / num_samples:.3f}, lr:{optimizer.state_dict()['param_groups'][0]['lr']: .6f}, time:{time.time() - start_tic: 4f}sec") if (epoch + 1) % val_every == 0: acc = get_eval_acc_results(model, val_loader, device) print("eval at epoch[" + str(epoch) + f"] acc[{acc:3f}]") writer.add_scalar('validing acc', acc, global_step) if acc > best_acc: best_acc = acc save_ckp(save_dir, model, optimizer, epoch, best_acc, date) example = torch.randn(1, 3, 10000).to(device) traced_script_module = torch.jit.trace(model, example) traced_script_module.save("../output/traced_model.pt")