def train(args, io): train_loader = DataLoader(ModelNet40(partition='train', num_points=args.num_points), num_workers=8, batch_size=args.batch_size, shuffle=True, drop_last=True) test_loader = DataLoader(ModelNet40(partition='test', num_points=args.num_points), num_workers=8, batch_size=args.test_batch_size, shuffle=True, drop_last=False) device = torch.device("cuda" if args.cuda else "cpu") #Try to load models if args.model == 'pointnet': model = PointNet(args).to(device) elif args.model == 'dgcnn': model = DGCNN(args).to(device) else: raise Exception("Not implemented") print(str(model)) model = nn.DataParallel(model) print("Let's use", torch.cuda.device_count(), "GPUs!") if args.use_sgd: print("Use SGD") opt = optim.SGD(model.parameters(), lr=args.lr * 100, momentum=args.momentum, weight_decay=1e-4) else: print("Use Adam") opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4) scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr) criterion = cal_loss best_test_acc = 0 for epoch in range(args.epochs): scheduler.step() #################### # Train #################### train_loss = 0.0 count = 0.0 model.train() train_pred = [] train_true = [] for data, label in train_loader: data, label = data.to(device), label.to(device).squeeze() data = data.permute(0, 2, 1) batch_size = data.size()[0] opt.zero_grad() logits = model(data) loss = criterion(logits, label) loss.backward() opt.step() preds = logits.max(dim=1)[1] count += batch_size train_loss += loss.item() * batch_size train_true.append(label.cpu().numpy()) train_pred.append(preds.detach().cpu().numpy()) train_true = np.concatenate(train_true) train_pred = np.concatenate(train_pred) outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f' % ( epoch, train_loss * 1.0 / count, metrics.accuracy_score(train_true, train_pred), metrics.balanced_accuracy_score(train_true, train_pred)) io.cprint(outstr) #################### # Test #################### test_loss = 0.0 count = 0.0 model.eval() test_pred = [] test_true = [] for data, label in test_loader: data, label = data.to(device), label.to(device).squeeze() data = data.permute(0, 2, 1) batch_size = data.size()[0] logits = model(data) loss = criterion(logits, label) preds = logits.max(dim=1)[1] count += batch_size test_loss += loss.item() * batch_size test_true.append(label.cpu().numpy()) test_pred.append(preds.detach().cpu().numpy()) test_true = np.concatenate(test_true) test_pred = np.concatenate(test_pred) test_acc = metrics.accuracy_score(test_true, test_pred) avg_per_class_acc = metrics.balanced_accuracy_score( test_true, test_pred) outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f' % ( epoch, test_loss * 1.0 / count, test_acc, avg_per_class_acc) io.cprint(outstr) if test_acc >= best_test_acc: best_test_acc = test_acc torch.save(model.state_dict(), 'checkpoints/%s/models/model.t7' % args.exp_name)
def train(modelin=args.model, modelout=args.out,device=args.device,opt=args.opt): # define model, dataloader, 3dmm eigenvectors, optimization method calib_net = PointNet(n=1) sfm_net = PointNet(n=199) if modelin != "": calib_path = os.path.join('model','calib_' + modelin) sfm_path = os.path.join('model','sfm_' + modelin) pretrained1 = torch.load(calib_path) pretrained2 = torch.load(sfm_path) calib_dict = calib_net.state_dict() sfm_dict = sfm_net.state_dict() pretrained1 = {k: v for k,v in pretrained1.items() if k in calib_dict} pretrained2 = {k: v for k,v in pretrained2.items() if k in sfm_dict} calib_dict.update(pretrained1) sfm_dict.update(pretrained2) calib_net.load_state_dict(pretrained1) sfm_net.load_state_dict(pretrained2) calib_net.to(device=device) sfm_net.to(device=device) opt1 = torch.optim.Adam(calib_net.parameters(),lr=1e-3) opt2 = torch.optim.Adam(sfm_net.parameters(),lr=1e-3) # dataloader data = dataloader.Data() loader = data.batchloader batch_size = data.batchsize # mean shape and eigenvectors for 3dmm mu_lm = torch.from_numpy(data.mu_lm).float()#.to(device=device) mu_lm[:,2] = mu_lm[:,2] * -1 mu_lm = torch.stack(batch_size * [mu_lm.to(device=device)]) shape = mu_lm lm_eigenvec = torch.from_numpy(data.lm_eigenvec).float().to(device=device) lm_eigenvec = torch.stack(batch_size * [lm_eigenvec]) M = data.M N = data.N # main training loop for epoch in itertools.count(): for j,batch in enumerate(loader): # get the input and gt values x_cam_gt = batch['x_cam_gt'].to(device=device) shape_gt = batch['x_w_gt'].to(device=device) fgt = batch['f_gt'].to(device=device) x_img = batch['x_img'].to(device=device) #beta_gt = batch['beta_gt'].to(device=device) #x_img_norm = batch['x_img_norm'] x_img_gt = batch['x_img_gt'].to(device=device).permute(0,2,1,3) batch_size = fgt.shape[0] one = torch.ones(batch_size,M*N,1).to(device=device) x_img_one = torch.cat([x_img,one],dim=2) x_cam_pt = x_cam_gt.permute(0,1,3,2).reshape(batch_size,6800,3) x = x_img.permute(0,2,1) #x = x_img.permute(0,2,1).reshape(batch_size,2,M,N) ptsI = x_img_one.reshape(batch_size,M,N,3).permute(0,1,3,2)[:,:,:2,:] # if just optimizing if not opt: # calibration f = calib_net(x) + 300 K = torch.zeros((batch_size,3,3)).float().to(device=device) K[:,0,0] = f.squeeze() K[:,1,1] = f.squeeze() K[:,2,2] = 1 # sfm betas = sfm_net(x) betas = betas.unsqueeze(-1) shape = mu_lm + torch.bmm(lm_eigenvec,betas).squeeze().view(batch_size,N,3) opt1.zero_grad() opt2.zero_grad() f_error = torch.mean(torch.abs(f - fgt)) #error2d = torch.mean(torch.abs(pred - x_img_gt)) error3d = torch.mean(torch.abs(shape - shape_gt)) error = f_error + error3d error.backward() opt1.step() opt2.step() print(f"f_error: {f_error.item():.3f} | error3d: {error3d.item():.3f} | f/fgt: {f[0].item():.1f}/{fgt[0].item():.1f} | f/fgt: {f[1].item():.1f}/{fgt[1].item():.1f} | f/fgt: {f[2].item():.1f}/{fgt[2].item():.1f} | f/fgt: {f[3].item():.1f}/{fgt[3].item():.1f} ") continue # get shape error from image projection print(f"f/fgt: {f[0].item():.3f}/{fgt[0].item():.3f} | rmse: {rmse:.3f} | f_rel: {f_error.item():.4f} | loss1: {loss1.item():.3f} | loss2: {loss2.item():.3f}") # save model and increment weight decay print("saving!") torch.save(sfm_net.state_dict(), os.path.join('model','sfm_'+modelout)) torch.save(calib_net.state_dict(), os.path.join('model','calib_'+modelout)) test(modelin=args.out,outfile=args.out,optimize=False)
def startcustomtraining(args, io): ft_loader = DataLoader(FT10(num_points=args.num_points), num_workers=8, batch_size=args.test_batch_size, shuffle=True, drop_last=True) ft_test_loader = DataLoader(FT11(num_points=args.num_points), num_workers=8, batch_size=args.test_batch_size, shuffle=True, drop_last=False) device = torch.device("cuda" if args.cuda else "cpu") #Try to load models if args.model == 'pointnet': model = PointNet(args).to(device) elif args.model == 'dgcnn': model = DGCNN(args).to(device) else: raise Exception("Not implemented") print(str(model)) model = nn.DataParallel(model) print("Let's use", torch.cuda.device_count(), "GPUs!") if args.use_sgd: print("Use SGD") opt = optim.SGD(model.parameters(), lr=args.lr * 100, momentum=args.momentum, weight_decay=1e-4) else: print("Use Adam") opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4) scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr) criterion = cal_loss best_ft_test_acc = 0.0 i = 0 train_accs = [] test_accs = [] epochs = [] for epoch in range(args.epochs): i += 1 scheduler.step() ft_loss = 0.0 count = 0 model.train() ft_pred = [] ft_true = [] for data, label in ft_loader: data, label = data.to(device), label.to(device).squeeze() data = data.permute(0, 2, 1) batch_size = data.size()[0] opt.zero_grad() logits = model(data) loss = criterion(logits, label) loss.backward() opt.step() preds = logits.max(dim=1)[1] count += batch_size ft_loss += loss.item() * batch_size ft_true.append(label.cpu().numpy()) ft_pred.append(preds.detach().cpu().numpy()) #print(data.shape, label.shape, logits.shape, preds.shape) #print('LABELS:', label) #print('PREDS:', preds) #print('LOGITS:', logits) ft_true = np.concatenate(ft_true) ft_pred = np.concatenate(ft_pred) ft_acc = metrics.accuracy_score(ft_true, ft_pred) avg_per_class_acc = metrics.balanced_accuracy_score(ft_true, ft_pred) outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f' % ( epoch, ft_loss * 1.0 / count, ft_acc, avg_per_class_acc) io.cprint(outstr) train_accs.append(ft_acc) ft_test_loss = 0.0 count = 0 model.eval() ft_test_pred = [] ft_test_true = [] for data, label in ft_test_loader: data, label = data.to(device), label.to(device).squeeze() data = data.permute(0, 2, 1) batch_size = data.size()[0] logits = model(data) loss = criterion(logits, label) preds = logits.max(dim=1)[1] count += batch_size ft_test_loss += loss.item() * batch_size ft_test_true.append(label.cpu().numpy()) ft_test_pred.append(preds.detach().cpu().numpy()) #print(data.shape, label.shape, logits.shape, preds.shape) #print('LABELS:', label) #print('PREDS:', preds) #print('LOGITS:', logits) ft_test_true = np.concatenate(ft_test_true) ft_test_pred = np.concatenate(ft_test_pred) ft_test_acc = metrics.accuracy_score(ft_test_true, ft_test_pred) avg_per_class_acc = metrics.balanced_accuracy_score( ft_test_true, ft_test_pred) outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f' % ( epoch, ft_test_loss * 1.0 / count, ft_test_acc, avg_per_class_acc) io.cprint(outstr) if ft_test_acc > best_ft_test_acc: print('save now') best_ft_test_acc = ft_test_acc torch.save(model.state_dict(), 'pretrained/custommodel.t7') #torch.save(model.state_dict(), 'pretrained/custommodel.t7') epochs.append(i) test_accs.append(ft_test_acc) fig, ax = plt.subplots() ax.plot(epochs, train_accs, color='blue', label='train acc') ax.plot(epochs, test_accs, color='red', label='test acc') ax.set(xlabel='epoch', ylabel='accuracy', title='accuracy values per epoch') ax.grid() ax.legend() fig.savefig("accuracy.png") plt.show()
def train(args, io): train_loader = DataLoader(ModelNet40(partition='train', num_points=args.num_points), num_workers=8, batch_size=args.batch_size, shuffle=True, drop_last=True) test_loader = DataLoader(ModelNet40(partition='test', num_points=args.num_points), num_workers=8, batch_size=args.test_batch_size, shuffle=True, drop_last=False) device = torch.device("cuda" if args.cuda else "cpu") #Try to load models if args.model == 'pointnet': model = PointNet(args).to(device) elif args.model == 'dgcnn': model = DGCNN(args).to(device) elif args.model == 'semigcn': model = SemiGCN(args).to(device) else: raise Exception("Not implemented") print(str(model)) model = nn.DataParallel(model) print("Let's use", torch.cuda.device_count(), "GPUs!") if args.use_sgd: print("Use SGD") opt = optim.SGD(model.parameters(), lr=args.lr*100, momentum=args.momentum, weight_decay=1e-4) else: print("Use Adam") opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) opt.load_state_dict(checkpoint['opt']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) #scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr, last_epoch=args.start_epoch-1) scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=20, gamma=0.8)#0.7 #scheduler = torch.optim.lr_scheduler.ExponentialLR(opt, gamma=0.9825, last_epoch=args.start_epoch-1) criterion = cal_loss best_test_acc = 0 for epoch in range(args.start_epoch, args.epochs): #scheduler.step() #################### # Train #################### train_loss = 0.0 count = 0.0 model.train() train_pred = [] train_true = [] for data, label in train_loader: data, label = data.to(device), label.to(device).squeeze() data = data.permute(0, 2, 1) batch_size = data.size()[0] opt.zero_grad() logits = model(data) loss = criterion(logits, label) loss.backward() opt.step() preds = logits.max(dim=1)[1] count += batch_size train_loss += loss.item() * batch_size train_true.append(label.cpu().numpy()) train_pred.append(preds.detach().cpu().numpy()) scheduler.step() train_true = np.concatenate(train_true) train_pred = np.concatenate(train_pred) outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f' % (epoch, train_loss*1.0/count, metrics.accuracy_score( train_true, train_pred), metrics.balanced_accuracy_score( train_true, train_pred)) io.cprint(outstr) if epoch%10 == 0: # save running checkpoint per 10 epoch torch.save({'epoch': epoch + 1, 'arch': args.model, 'state_dict': model.state_dict(), 'opt' : opt.state_dict()}, 'checkpoints/%s/models/checkpoint_latest.pth.tar' % args.exp_name) #################### # Test #################### test_loss = 0.0 count = 0.0 model.eval() test_pred = [] test_true = [] for data, label in test_loader: data, label = data.to(device), label.to(device).squeeze() data = data.permute(0, 2, 1) batch_size = data.size()[0] logits = model(data) loss = criterion(logits, label) preds = logits.max(dim=1)[1] count += batch_size test_loss += loss.item() * batch_size test_true.append(label.cpu().numpy()) test_pred.append(preds.detach().cpu().numpy()) test_true = np.concatenate(test_true) test_pred = np.concatenate(test_pred) test_acc = metrics.accuracy_score(test_true, test_pred) avg_per_class_acc = metrics.balanced_accuracy_score(test_true, test_pred) outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f' % (epoch, test_loss*1.0/count, test_acc, avg_per_class_acc) io.cprint(outstr) if test_acc >= best_test_acc: best_test_acc = test_acc torch.save({'epoch': epoch + 1, 'arch': args.model, 'state_dict': model.state_dict(), 'opt' : opt.state_dict()}, 'checkpoints/%s/models/checkpoint_best.pth.tar' % args.exp_name)
def train(args, io): train_loader = DataLoader(ModelNet40(partition='train', num_points=args.num_points), num_workers=8, batch_size=args.batch_size, shuffle=True, drop_last=True) test_loader = DataLoader(ModelNet40(partition='test', num_points=args.num_points), num_workers=8, batch_size=args.test_batch_size, shuffle=True, drop_last=False) device = torch.device("cuda" if args.cuda else "cpu") #Try to load models if args.model == 'pointnet': model = PointNet(args).to(device) elif args.model == 'dgcnn': model = DGCNN(args).to(device) elif args.model == 'ssg': model = PointNet2SSG(output_classes=40, dropout_prob=args.dropout) model.to(device) elif args.model == 'msg': model = PointNet2MSG(output_classes=40, dropout_prob=args.dropout) model.to(device) elif args.model == 'ognet': # [64,128,256,512] model = Model_dense(20, args.feature_dims, [512], output_classes=40, init_points=768, input_dims=3, dropout_prob=args.dropout, id_skip=args.id_skip, drop_connect_rate=args.drop_connect_rate, cluster='xyzrgb', pre_act=args.pre_act, norm=args.norm_layer) if args.efficient: model = ModelE_dense(20, args.feature_dims, [512], output_classes=40, init_points=768, input_dims=3, dropout_prob=args.dropout, id_skip=args.id_skip, drop_connect_rate=args.drop_connect_rate, cluster='xyzrgb', pre_act=args.pre_act, norm=args.norm_layer, gem=args.gem, ASPP=args.ASPP) model.to(device) elif args.model == 'ognet-small': # [48,96,192,384] model = Model_dense(20, args.feature_dims, [512], output_classes=40, init_points=768, input_dims=3, dropout_prob=args.dropout, id_skip=args.id_skip, drop_connect_rate=args.drop_connect_rate, cluster='xyzrgb', pre_act=args.pre_act, norm=args.norm_layer) model.to(device) else: raise Exception("Not implemented") print(str(model)) model = nn.DataParallel(model) print("Let's use", torch.cuda.device_count(), "GPUs!") if args.use_sgd: print("Use SGD") opt = optim.SGD(model.parameters(), lr=args.lr * 100, momentum=args.momentum, weight_decay=1e-4) scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr) else: print("Use Adam") opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4) scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=0.01 * args.lr) criterion = cal_loss best_test_acc = 0 best_avg_per_class_acc = 0 warm_up = 0.1 # We start from the 0.1*lrRate warm_iteration = round( len(ModelNet40(partition='train', num_points=args.num_points)) / args.batch_size) * args.warm_epoch # first 5 epoch for epoch in range(args.epochs): scheduler.step() #################### # Train #################### train_loss = 0.0 count = 0.0 model.train() train_pred = [] train_true = [] for data, label in train_loader: data, label = data.to(device), label.to(device).squeeze() batch_size = data.size()[0] opt.zero_grad() if args.model == 'ognet' or args.model == 'ognet-small' or args.model == 'ssg' or args.model == 'msg': logits = model(data, data) else: data = data.permute(0, 2, 1) logits = model(data) loss = criterion(logits, label) if epoch < args.warm_epoch: warm_up = min(1.0, warm_up + 0.9 / warm_iteration) loss *= warm_up loss.backward() opt.step() preds = logits.max(dim=1)[1] count += batch_size train_loss += loss.item() * batch_size train_true.append(label.cpu().numpy()) train_pred.append(preds.detach().cpu().numpy()) train_true = np.concatenate(train_true) train_pred = np.concatenate(train_pred) outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f' % ( epoch, train_loss * 1.0 / count, metrics.accuracy_score(train_true, train_pred), metrics.balanced_accuracy_score(train_true, train_pred)) io.cprint(outstr) #################### # Test #################### test_loss = 0.0 count = 0.0 model.eval() test_pred = [] test_true = [] for data, label in test_loader: data, label = data.to(device), label.to(device).squeeze() batch_size = data.size()[0] if args.model == 'ognet' or args.model == 'ognet-small' or args.model == 'ssg' or args.model == 'msg': logits = model(data, data) else: data = data.permute(0, 2, 1) logits = model(data) loss = criterion(logits, label) preds = logits.max(dim=1)[1] count += batch_size test_loss += loss.item() * batch_size test_true.append(label.cpu().numpy()) test_pred.append(preds.detach().cpu().numpy()) test_true = np.concatenate(test_true) test_pred = np.concatenate(test_pred) test_acc = metrics.accuracy_score(test_true, test_pred) avg_per_class_acc = metrics.balanced_accuracy_score( test_true, test_pred) outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f' % ( epoch, test_loss * 1.0 / count, test_acc, avg_per_class_acc) io.cprint(outstr) if test_acc + avg_per_class_acc >= best_test_acc + best_avg_per_class_acc: best_test_acc = test_acc best_avg_per_class_acc = avg_per_class_acc print('This is the current best.') torch.save(model.state_dict(), 'checkpoints/%s/models/model.t7' % args.exp_name)
def train(args, io): train_loader = DataLoader(ModelNet40(partition='train'), num_workers=8, batch_size=args.batch_size, shuffle=True, drop_last=True) test_loader = DataLoader(ModelNet40(partition='test'), num_workers=8, batch_size=args.batch_size, shuffle=True, drop_last=False) device = torch.device("cuda:0") model = PointNet().to(device) print(str(model)) print("Use Adam") opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-6) scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr) criterion = cal_loss best_test_acc = 0 for epoch in range(args.epochs): scheduler.step() #################### # Train #################### train_loss = 0.0 count = 0.0 model.train() train_pred = [] train_true = [] for data, label in train_loader: data, label = data.to(device), label.to(device).squeeze() batch_size = data.size()[0] opt.zero_grad() logits = model(data.float()) loss = criterion(logits, label) loss.backward() opt.step() preds = logits.max(dim=1)[1] count += batch_size train_loss += loss.item() * batch_size train_true.append(label.cpu().numpy()) train_pred.append(preds.detach().cpu().numpy()) train_true = np.concatenate(train_true) train_pred = np.concatenate(train_pred) outstr = 'Train %d, loss: %.6f, train acc: %.6f, train avg acc: %.6f' % ( epoch, train_loss * 1.0 / count, metrics.accuracy_score(train_true, train_pred), metrics.balanced_accuracy_score(train_true, train_pred)) io.cprint(outstr) #################### # Test #################### test_loss = 0.0 count = 0.0 model.eval() test_pred = [] test_true = [] for data, label in test_loader: data, label = data.to(device), label.to(device).squeeze() batch_size = data.size()[0] logits = model(data.float()) loss = criterion(logits, label) preds = logits.max(dim=1)[1] count += batch_size test_loss += loss.item() * batch_size test_true.append(label.cpu().numpy()) test_pred.append(preds.detach().cpu().numpy()) test_true = np.concatenate(test_true) test_pred = np.concatenate(test_pred) test_acc = metrics.accuracy_score(test_true, test_pred) avg_per_class_acc = metrics.balanced_accuracy_score( test_true, test_pred) outstr = 'Test %d, loss: %.6f, test acc: %.6f, test avg acc: %.6f' % ( epoch, test_loss * 1.0 / count, test_acc, avg_per_class_acc) io.cprint(outstr) if test_acc >= best_test_acc: best_test_acc = test_acc torch.save(model.state_dict(), 'checkpoints/%s/models/model.t7' % args.exp_name) print('Saving ckpt with acc: %f' % best_test_acc)