def train(cfg): train_ds = COCODataset(cfg.train_imgs_path, cfg.train_anno_path, resize_size=cfg.resize_size) train_dl = DataLoader(train_ds, batch_size=cfg.batch_size, shuffle=False, num_workers=cfg.num_workers, collate_fn=train_ds.collate_fn) if cfg.eval: eval_ds = COCODataset(cfg.eval_imgs_path, cfg.eval_anno_path, resize_size=cfg.resize_size) eval_dl = DataLoader(eval_ds, batch_size=max(cfg.batch_size // 2, 1), num_workers=cfg.num_workers, collate_fn=eval_ds.collate_fn) else: eval_dl = None model = CenterNet(cfg) if cfg.gpu: model = model.cuda() loss_func = Loss(cfg) trainer = Trainer(cfg, model, loss_func, train_dl, eval_dl) trainer.train()
def main(): model = Model() #pretrained_dict = torch.load('weights/resnet152-b121ed2d.pth', map_location=lambda storage, loc: storage) pretrained_dict = torch.load('weights/resnet152-b121ed2d.pth') model_dict = model.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } model_dict.update(pretrained_dict) model.load_state_dict(model_dict) data = VRIC_data.Data() loss = Loss() main = Main(model, loss, data) #main.evaluate_ai() if opt.mode == 'train': for epoch in range(1, opt.epoch + 1): print('\nepoch', epoch) outputs = main.train() vis.line(Y=np.column_stack( (outputs[0], outputs[1], outputs[2], outputs[3])), X=np.column_stack((epoch, epoch, epoch, epoch)), win='Learning curve', update='append', opts={ 'title': 'Learning curve', }) vis.line(Y=np.column_stack((outputs[4], outputs[5], outputs[6], outputs[7], outputs[8], outputs[9])), X=np.column_stack( (epoch, epoch, epoch, epoch, epoch, epoch)), win='accuracy curve', update='append', opts={ 'title': 'accuracy curve', }) if epoch % 1 == 0: print('\nstart evaluate') os.makedirs('weights/AI_mgn', exist_ok=True) torch.save(model.state_dict(), ('weights/AI_mgn/modelv5_{}.pth'.format(epoch))) if opt.mode == 'evaluate': print('start evaluate') model.load_state_dict(torch.load(opt.weight)) main.evaluate() if opt.mode == 'aicity': print('start output txt files') model.load_state_dict(torch.load(opt.weight)) main.AICity_evaluate()
def train(cfg): train_ds = VOCDataset(cfg.root, mode=cfg.split, resize_size=cfg.resize_size) train_dl = DataLoader(train_ds, batch_size=1, shuffle=True, num_workers=cfg.num_workers, collate_fn=train_ds.collate_fn, pin_memory=True) model = CenterNet(cfg) if cfg.gpu: model = model.cuda() loss_func = Loss(cfg) epoch = 100 cfg.max_iter = len(train_dl) * epoch cfg.steps = (int(cfg.max_iter * 0.6), int(cfg.max_iter * 0.8)) trainer = Trainer(cfg, model, loss_func, train_dl, None) trainer.train()
def train(args): start_t = time.time() params = get_train_options() params["exp_name"] = args.exp_name params["patch_num_point"] = 1024 params["batch_size"] = args.batch_size params['use_gan'] = args.use_gan if args.debug: params["nepoch"] = 2 params["model_save_interval"] = 3 params['model_vis_interval'] = 3 log_dir = os.path.join(params["model_save_dir"], args.exp_name) if os.path.exists(log_dir) == False: os.makedirs(log_dir) tb_logger = Logger(log_dir) trainloader = PUNET_Dataset(h5_file_path=params["dataset_dir"]) # print(params["dataset_dir"]) num_workers = 4 train_data_loader = data.DataLoader(dataset=trainloader, batch_size=params["batch_size"], shuffle=True, num_workers=num_workers, pin_memory=True, drop_last=True) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') G_model = Generator_recon(params) G_model.apply(xavier_init) G_model = torch.nn.DataParallel(G_model).to(device) D_model = torch.nn.DataParallel(Discriminator(params, in_channels=3)).to(device) G_model.train() D_model.train() optimizer_D = Adam(D_model.parameters(), lr=params["lr_D"], betas=(0.9, 0.999)) optimizer_G = Adam(G_model.parameters(), lr=params["lr_G"], betas=(0.9, 0.999)) D_scheduler = MultiStepLR(optimizer_D, [50, 80], gamma=0.2) G_scheduler = MultiStepLR(optimizer_G, [50, 80], gamma=0.2) Loss_fn = Loss() print("preparation time is %fs" % (time.time() - start_t)) iter = 0 for e in range(params["nepoch"]): D_scheduler.step() G_scheduler.step() for batch_id, (input_data, gt_data, radius_data) in enumerate(train_data_loader): optimizer_G.zero_grad() optimizer_D.zero_grad() input_data = input_data[:, :, 0:3].permute(0, 2, 1).float().cuda() gt_data = gt_data[:, :, 0:3].permute(0, 2, 1).float().cuda() start_t_batch = time.time() output_point_cloud = G_model(input_data) emd_loss = Loss_fn.get_emd_loss( output_point_cloud.permute(0, 2, 1), input_data.permute(0, 2, 1)) total_G_loss = emd_loss total_G_loss.backward() optimizer_G.step() current_lr_D = optimizer_D.state_dict()['param_groups'][0]['lr'] current_lr_G = optimizer_G.state_dict()['param_groups'][0]['lr'] tb_logger.scalar_summary('emd_loss', emd_loss.item(), iter) tb_logger.scalar_summary('lr_D', current_lr_D, iter) tb_logger.scalar_summary('lr_G', current_lr_G, iter) msg = "{:0>8},{}:{}, [{}/{}], {}: {},{}:{}".format( str(datetime.timedelta(seconds=round(time.time() - start_t))), "epoch", e, batch_id + 1, len(train_data_loader), "total_G_loss", total_G_loss.item(), "iter time", (time.time() - start_t_batch)) print(msg) if iter % params['model_save_interval'] == 0 and iter > 0: model_save_dir = os.path.join(params['model_save_dir'], params['exp_name']) if os.path.exists(model_save_dir) == False: os.makedirs(model_save_dir) D_ckpt_model_filename = "D_iter_%d.pth" % (iter) G_ckpt_model_filename = "G_iter_%d.pth" % (iter) D_model_save_path = os.path.join(model_save_dir, D_ckpt_model_filename) G_model_save_path = os.path.join(model_save_dir, G_ckpt_model_filename) torch.save(D_model.module.state_dict(), D_model_save_path) torch.save(G_model.module.state_dict(), G_model_save_path) if iter % params['model_vis_interval'] == 0 and iter > 0: np_pcd = output_point_cloud.permute( 0, 2, 1)[0].detach().cpu().numpy() # print(np_pcd.shape) img = (np.array(visualize_point_cloud(np_pcd)) * 255).astype( np.uint8) tb_logger.image_summary("images", img[np.newaxis, :], iter) gt_pcd = gt_data.permute(0, 2, 1)[0].detach().cpu().numpy() # print(gt_pcd.shape) gt_img = (np.array(visualize_point_cloud(gt_pcd)) * 255).astype(np.uint8) tb_logger.image_summary("gt", gt_img[np.newaxis, :], iter) input_pcd = input_data.permute(0, 2, 1)[0].detach().cpu().numpy() input_img = (np.array(visualize_point_cloud(input_pcd)) * 255).astype(np.uint8) tb_logger.image_summary("input", input_img[np.newaxis, :], iter) iter += 1
def train(args): start_t = time.time() params = get_train_options() params["exp_name"] = args.exp_name params["patch_num_point"] = 1024 params["batch_size"] = args.batch_size params['use_gan'] = args.use_gan if args.debug: params["nepoch"] = 2 params["model_save_interval"] = 3 params['model_vis_interval'] = 3 log_dir = os.path.join(params["model_save_dir"], args.exp_name) if os.path.exists(log_dir) == False: os.makedirs(log_dir) tb_logger = Logger(log_dir) trainloader = PUNET_Dataset(h5_file_path=params["dataset_dir"], split_dir=params['train_split']) #print(params["dataset_dir"]) num_workers = 4 train_data_loader = data.DataLoader(dataset=trainloader, batch_size=params["batch_size"], shuffle=True, num_workers=num_workers, pin_memory=True, drop_last=True) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') G_model = Generator(params) G_model.apply(xavier_init) G_model = torch.nn.DataParallel(G_model).to(device) D_model = Discriminator(params, in_channels=3) D_model.apply(xavier_init) D_model = torch.nn.DataParallel(D_model).to(device) G_model.train() D_model.train() optimizer_D = Adam(D_model.parameters(), lr=params["lr_D"], betas=(0.9, 0.999)) optimizer_G = Adam(G_model.parameters(), lr=params["lr_G"], betas=(0.9, 0.999)) D_scheduler = MultiStepLR(optimizer_D, [50, 80], gamma=0.2) G_scheduler = MultiStepLR(optimizer_G, [50, 80], gamma=0.2) Loss_fn = Loss() print("preparation time is %fs" % (time.time() - start_t)) iter = 0 for e in range(params["nepoch"]): D_scheduler.step() G_scheduler.step() for batch_id, (input_data, gt_data, radius_data) in enumerate(train_data_loader): optimizer_G.zero_grad() optimizer_D.zero_grad() input_data = input_data[:, :, 0:3].permute(0, 2, 1).float().cuda() gt_data = gt_data[:, :, 0:3].permute(0, 2, 1).float().cuda() start_t_batch = time.time() output_point_cloud = G_model(input_data) repulsion_loss = Loss_fn.get_repulsion_loss( output_point_cloud.permute(0, 2, 1)) uniform_loss = Loss_fn.get_uniform_loss( output_point_cloud.permute(0, 2, 1)) #print(output_point_cloud.shape,gt_data.shape) emd_loss = Loss_fn.get_emd_loss( output_point_cloud.permute(0, 2, 1), gt_data.permute(0, 2, 1)) if params['use_gan'] == True: fake_pred = D_model(output_point_cloud.detach()) d_loss_fake = Loss_fn.get_discriminator_loss_single( fake_pred, label=False) d_loss_fake.backward() optimizer_D.step() real_pred = D_model(gt_data.detach()) d_loss_real = Loss_fn.get_discriminator_loss_single(real_pred, label=True) d_loss_real.backward() optimizer_D.step() d_loss = d_loss_real + d_loss_fake fake_pred = D_model(output_point_cloud) g_loss = Loss_fn.get_generator_loss(fake_pred) #print(repulsion_loss,uniform_loss,emd_loss) total_G_loss=params['uniform_w']*uniform_loss+params['emd_w']*emd_loss+ \ repulsion_loss*params['repulsion_w']+ g_loss*params['gan_w'] else: #total_G_loss = params['uniform_w'] * uniform_loss + params['emd_w'] * emd_loss + \ # repulsion_loss * params['repulsion_w'] total_G_loss=params['emd_w'] * emd_loss + \ repulsion_loss * params['repulsion_w'] #total_G_loss=emd_loss total_G_loss.backward() optimizer_G.step() current_lr_D = optimizer_D.state_dict()['param_groups'][0]['lr'] current_lr_G = optimizer_G.state_dict()['param_groups'][0]['lr'] tb_logger.scalar_summary('repulsion_loss', repulsion_loss.item(), iter) tb_logger.scalar_summary('uniform_loss', uniform_loss.item(), iter) tb_logger.scalar_summary('emd_loss', emd_loss.item(), iter) if params['use_gan'] == True: tb_logger.scalar_summary('d_loss', d_loss.item(), iter) tb_logger.scalar_summary('g_loss', g_loss.item(), iter) tb_logger.scalar_summary('lr_D', current_lr_D, iter) tb_logger.scalar_summary('lr_G', current_lr_G, iter) msg = "{:0>8},{}:{}, [{}/{}], {}: {},{}:{}".format( str(datetime.timedelta(seconds=round(time.time() - start_t))), "epoch", e, batch_id + 1, len(train_data_loader), "total_G_loss", total_G_loss.item(), "iter time", (time.time() - start_t_batch)) print(msg) iter += 1 if (e + 1) % params['model_save_interval'] == 0 and e > 0: model_save_dir = os.path.join(params['model_save_dir'], params['exp_name']) if os.path.exists(model_save_dir) == False: os.makedirs(model_save_dir) D_ckpt_model_filename = "D_iter_%d.pth" % (e) G_ckpt_model_filename = "G_iter_%d.pth" % (e) D_model_save_path = os.path.join(model_save_dir, D_ckpt_model_filename) G_model_save_path = os.path.join(model_save_dir, G_ckpt_model_filename) torch.save(D_model.module.state_dict(), D_model_save_path) torch.save(G_model.module.state_dict(), G_model_save_path)
def train(args): start_t = time.time() params = get_train_options() params["exp_name"] = args.exp_name params["patch_num_point"] = 256 params["batch_size"] = args.batch_size if args.debug: params["nepoch"] = 2 params["model_save_interval"] = 3 params['model_vis_interval'] = 3 log_dir = os.path.join(params["model_save_dir"], args.exp_name) if os.path.exists(log_dir) == False: os.makedirs(log_dir) tb_logger = Logger(log_dir) #trainloader=PUNET_Dataset(h5_file_path=params["dataset_dir"],split_dir=params['train_split']) trainloader = PUGAN_Dataset(h5_file_path=params["dataset_dir"], npoint=256) num_workers = 4 train_data_loader = data.DataLoader(dataset=trainloader, batch_size=params["batch_size"], shuffle=True, num_workers=num_workers, pin_memory=True, drop_last=True) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') ########################################## # Initialize generator and discriminator # ########################################## G_AB = Generator(params) G_AB.apply(xavier_init) G_AB = torch.nn.DataParallel(G_AB).to(device) G_BA = Downsampler(params) G_BA.apply(xavier_init) G_BA = torch.nn.DataParallel(G_BA).to(device) D_A = Discriminator(params, in_channels=3) D_A.apply(xavier_init) D_A = torch.nn.DataParallel(D_A).to(device) D_B = Discriminator(params, in_channels=3) D_B.apply(xavier_init) D_B = torch.nn.DataParallel(D_B).to(device) ######################################## #Optimizers and Learning Rate scheduler# ######################################## optimizer_D_A = Adam(D_A.parameters(), lr=params["lr_D_A"], betas=(0.9, 0.999)) optimizer_D_B = Adam(D_B.parameters(), lr=params["lr_D_B"], betas=(0.9, 0.999)) optimizer_G_AB = Adam(G_AB.parameters(), lr=params["lr_G_AB"], betas=(0.9, 0.999)) optimizer_G_BA = Adam(G_BA.parameters(), lr=params["lr_G_BA"], betas=(0.9, 0.999)) D_A_scheduler = MultiStepLR(optimizer_D_A, [50, 80], gamma=0.2) G_AB_scheduler = MultiStepLR(optimizer_G_AB, [50, 80], gamma=0.2) D_B_scheduler = MultiStepLR(optimizer_D_A, [50, 80], gamma=0.2) G_BA_scheduler = MultiStepLR(optimizer_G_AB, [50, 80], gamma=0.2) Loss_fn = Loss() print("preparation time is %fs" % (time.time() - start_t)) iter = 0 for e in range(params["nepoch"]): for batch_id, (input_data, gt_data, radius_data) in enumerate(train_data_loader): G_AB.train() G_BA.train() D_A.train() D_B.train() optimizer_G_AB.zero_grad() optimizer_D_A.zero_grad() optimizer_G_BA.zero_grad() optimizer_D_B.zero_grad() input_data = input_data[:, :, 0:3].permute(0, 2, 1).float().cuda() gt_data = gt_data[:, :, 0:3].permute(0, 2, 1).float().cuda() start_t_batch = time.time() output_point_cloud_high = G_AB(input_data) output_point_cloud_low = G_BA(gt_data) ##################################### # Loss # ##################################### repulsion_loss_AB = Loss_fn.get_repulsion_loss( output_point_cloud_high.permute(0, 2, 1)) uniform_loss_AB = Loss_fn.get_uniform_loss( output_point_cloud_high.permute(0, 2, 1)) repulsion_loss_BA = Loss_fn.get_repulsion_loss( output_point_cloud_low.permute(0, 2, 1)) uniform_loss_BA = Loss_fn.get_uniform_loss( output_point_cloud_low.permute(0, 2, 1)) emd_loss_AB = Loss_fn.get_emd_loss( output_point_cloud_high.permute(0, 2, 1), gt_data.permute(0, 2, 1)) #emd_loss_BA = Loss_fn.get_emd_loss(output_point_cloud_low.permute(0, 2, 1), input_data.permute(0, 2, 1)) #Cycle Loss recov_A = G_BA(output_point_cloud_high) ABA_repul_loss = Loss_fn.get_repulsion_loss( recov_A.permute(0, 2, 1)) ABA_uniform_loss = Loss_fn.get_uniform_loss( recov_A.permute(0, 2, 1)) recov_B = G_AB(output_point_cloud_low) BAB_repul_loss = Loss_fn.get_repulsion_loss( recov_B.permute(0, 2, 1)) BAB_uniform_loss = Loss_fn.get_uniform_loss( recov_B.permute(0, 2, 1)) BAB_emd_loss = Loss_fn.get_emd_loss(recov_B.permute(0, 2, 1), gt_data.permute(0, 2, 1)) #G_AB loss fake_pred_B = D_A(output_point_cloud_high.detach()) g_AB_loss = Loss_fn.get_generator_loss(fake_pred_B) total_G_AB_loss=g_AB_loss*params['gan_w_AB']+ BAB_repul_loss*params['repulsion_w_AB']+ \ BAB_uniform_loss*params['uniform_w_AB']+ BAB_emd_loss*params['emd_w_AB']+ \ params['uniform_w_AB']*uniform_loss_AB+params['emd_w_AB']*emd_loss_AB+ \ repulsion_loss_AB*params['repulsion_w_AB'] total_G_AB_loss.backward() optimizer_G_AB.step() #G_BA loss fake_pred_A = D_B(output_point_cloud_low.detach()) g_BA_loss = Loss_fn.get_generator_loss(fake_pred_A) total_G_BA_loss=g_BA_loss*params['gan_w_BA']+ ABA_repul_loss*params['repulsion_w_BA']+ \ repulsion_loss_BA*params['repulsion_w_BA'] # ABA_uniform_loss*params['uniform_w_BA']+ \ # params['uniform_w_BA']*uniform_loss_BA+ \ total_G_BA_loss.backward() optimizer_G_BA.step() #Discriminator A loss fake_B_ = fake_A_buffer.push_and_pop(output_point_cloud_high) fake_pred_B = D_A(fake_B_.detach()) d_A_loss_fake = Loss_fn.get_discriminator_loss_single(fake_pred_B, label=False) real_pred_B = D_A(gt_data.detach()) d_A_loss_real = Loss_fn.get_discriminator_loss_single(real_pred_B, label=True) d_A_loss = d_A_loss_real + d_A_loss_fake d_A_loss.backward() optimizer_D_A.step() #Discriminator B loss fake_A_ = fake_B_buffer.push_and_pop(output_point_cloud_low) fake_pred_A = D_B(fake_A_.detach()) d_B_loss_fake = Loss_fn.get_discriminator_loss_single(fake_pred_A, label=False) real_pred_A = D_B(input_data.detach()) d_B_loss_real = Loss_fn.get_discriminator_loss_single(real_pred_A, label=True) d_B_loss = d_B_loss_real + d_B_loss_fake d_B_loss.backward() optimizer_D_B.step() #Learning rate scheduler# current_lr_D_A = optimizer_D_A.state_dict( )['param_groups'][0]['lr'] current_lr_G_AB = optimizer_G_AB.state_dict( )['param_groups'][0]['lr'] current_lr_D_B = optimizer_D_B.state_dict( )['param_groups'][0]['lr'] current_lr_G_BA = optimizer_G_BA.state_dict( )['param_groups'][0]['lr'] # tb_logger.scalar_summary('repulsion_loss_AB', repulsion_loss_AB.item(), iter) # tb_logger.scalar_summary('uniform_loss_AB', uniform_loss_AB.item(), iter) # tb_logger.scalar_summary('repulsion_loss_BA', repulsion_loss_BA.item(), iter) # tb_logger.scalar_summary('uniform_loss_BA', uniform_loss_BA.item(), iter) # tb_logger.scalar_summary('emd_loss_AB', emd_loss_AB.item(), iter) tb_logger.scalar_summary('d_A_loss', d_A_loss.item(), iter) tb_logger.scalar_summary('g_AB_loss', g_AB_loss.item(), iter) tb_logger.scalar_summary('Total_G_AB_loss', total_G_AB_loss.item(), iter) tb_logger.scalar_summary('lr_D_A', current_lr_D_A, iter) tb_logger.scalar_summary('lr_G_AB', current_lr_G_AB, iter) tb_logger.scalar_summary('d_B_loss', d_B_loss.item(), iter) tb_logger.scalar_summary('g_BA_loss', g_BA_loss.item(), iter) tb_logger.scalar_summary('Total_G_BA_loss', total_G_BA_loss.item(), iter) tb_logger.scalar_summary('lr_D_B', current_lr_D_B, iter) tb_logger.scalar_summary('lr_G_BA', current_lr_G_BA, iter) msg = "{:0>8},{}:{}, [{}/{}], {}: {}, {}: {}, {}:{}, {}: {},{}: {}".format( str(datetime.timedelta(seconds=round(time.time() - start_t))), "epoch", e + 1, batch_id + 1, len(train_data_loader), "total_G_AB_loss", total_G_AB_loss.item(), "total_G_BA_loss", total_G_BA_loss.item(), "iter time", (time.time() - start_t_batch), "d_A_loss", d_A_loss.item(), "d_B_loss", d_B_loss.item()) print(msg) iter += 1 D_A_scheduler.step() G_AB_scheduler.step() D_B_scheduler.step() G_BA_scheduler.step() if (e + 1) % params['model_save_interval'] == 0 and e > 0: model_save_dir = os.path.join(params['model_save_dir'], params['exp_name']) if os.path.exists(model_save_dir) == False: os.makedirs(model_save_dir) D_A_ckpt_model_filename = "D_A_iter_%d.pth" % (e + 1) G_AB_ckpt_model_filename = "G_AB_iter_%d.pth" % (e + 1) D_A_model_save_path = os.path.join(model_save_dir, D_A_ckpt_model_filename) G_AB_model_save_path = os.path.join(model_save_dir, G_AB_ckpt_model_filename) D_B_ckpt_model_filename = "D_B_iter_%d.pth" % (e + 1) G_BA_ckpt_model_filename = "G_BA_iter_%d.pth" % (e + 1) model_ckpt_model_filename = "Cyclegan_iter_%d.pth" % (e + 1) D_B_model_save_path = os.path.join(model_save_dir, D_B_ckpt_model_filename) G_BA_model_save_path = os.path.join(model_save_dir, G_BA_ckpt_model_filename) model_all_path = os.path.join(model_save_dir, model_ckpt_model_filename) torch.save( { 'G_AB_state_dict': G_AB.module.state_dict(), 'G_BA_state_dict': G_BA.module.state_dict(), 'D_A_state_dict': D_A.module.state_dict(), 'D_B_state_dict': D_B.module.state_dict(), 'optimizer_G_AB_state_dict': optimizer_G_AB.state_dict(), 'optimizer_G_BA_state_dict': optimizer_G_BA.state_dict(), 'optimizer_D_A_state_dict': optimizer_D_A.state_dict(), 'optimizer_D_B_state_dict': optimizer_D_B.state_dict() }, model_all_path) torch.save(D_A.module.state_dict(), D_A_model_save_path) torch.save(G_AB.module.state_dict(), G_AB_model_save_path) torch.save(D_B.module.state_dict(), D_B_model_save_path) torch.save(G_BA.module.state_dict(), G_BA_model_save_path)
def main(): print('------------') opt.manualSeed = random.randint(1, 100) # 设定随机数 random.seed(opt.manualSeed) # 设定随机种子 torch.manual_seed(opt.manualSeed) # 设定随机种子 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 设定设备 opt.num_objects = 13 # 训练数据的物体种类数目 opt.num_points = 500 # 输入点云的数目 opt.outf = 'trained_models/linemod' # 训练模型保存的目录 opt.log_dir = 'logs/linemod' # log保存的目录 opt.repeat_epoch = 20 # 重复epoch数目 estimator = PoseNet(num_points=opt.num_points, num_obj=opt.num_objects) # 网络构建,构建完成,对物体的6D姿态进行预测 print(estimator) estimator.to(device) # 选择设备 refiner = PoseRefineNet(num_points=opt.num_points, num_obj=opt.num_objects) # 对初步预测的姿态进行提炼 print(refiner) refiner.to(device) # 选择设备 if opt.resume_posenet != '': # 对posenet模型的加载,如果有的话, estimator.load_state_dict(torch.load('{0}/{1}'.format(opt.outf, opt.resume_posenet))) if opt.resume_refinenet != '': # 对refinenet模型的加载,如果有的话, refiner.load_state_dict(torch.load('{0}/{1}'.format(opt.outf, opt.resume_refinenet))) opt.refine_start = True # 标记refine网络开始训练 opt.decay_start = True # 标记refine网络参数开始衰减 opt.lr *= opt.lr_rate # 学习率变化 opt.w *= opt.w_rate # 权重衰减率变化 opt.batch_size = int(opt.batch_size / opt.iteration) # batchsize设定 optimizer = optim.Adam(refiner.parameters(), lr=opt.lr) # 设定refine的优化器 else: opt.refine_start = False # 标记refine网络未开始训练 opt.decay_start = False # 标记refine网络参数未开始衰减 optimizer = optim.Adam(estimator.parameters(), lr=opt.lr) # 设定posenet的优化器 # 加载训练数据集 dataset = PoseDataset('train', opt.num_points, True, opt.dataset_root, opt.noise_trans, opt.refine_start) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.workers) # 加载验证数据集 test_dataset = PoseDataset('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start) test_dataloder = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers) opt.sym_list = dataset.get_sym_list() # 设定对称物体列表 opt.num_points_mesh = dataset.get_num_points_mesh() # 设定点云数目 print('----------Dataset loaded!---------\nlength of the training set: {0}\nlength of the testing set: {1}\nnumber ' 'of sample points on mesh: {2}\nsymmetry object list: {3}'.format(len(dataset), len(test_dataset), opt.num_points_mesh, opt.sym_list)) criterion = Loss(opt.num_points_mesh, opt.sym_list) # loss计算 criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list) # refine_loss计算 best_test = np.Inf # 初始位置最好的模型,loss无限大 if opt.start_epoch == 1: # 开始训练,则删除之前的日志 for log in os.listdir(opt.log_dir): os.remove(os.path.join(opt.log_dir, log)) st_time = time.time() # 记录开始时间 # 开始循环迭代,训练模型-----------------------------------! for epoch in range(opt.start_epoch, opt.nepoch): # 保存开始迭代的log信息 logger = setup_logger('epoch%d' % epoch, os.path.join(opt.log_dir, 'epoch_%d_log.txt' % epoch)) # 记录每个epoch时间 logger.info('Train time {0}'.format(time.strftime('%Hh %Mm %Ss', time.gmtime(time.time() - st_time)) + 'Training started')) train_count = 0 train_dis_avg = 0.0 # 用于计算平均距离 if opt.refine_start: # 如果refine模型,已经开始训练 estimator.eval() refiner.train() else: estimator.train() optimizer.zero_grad() # 优化器清零梯度 for rep in range(opt.repeat_epoch): # 每次epoch重复训练次数 for i, data in enumerate(dataloader, 0): points, choose, img, target, model_points, idx = data # 读取数据 ''' points: 由深度图计算出来的点云,该点云数据以摄像头为参考坐标 choose: 所选择点云的索引[bs, 1, 500] img: 通过box剪切下来的RGB图像 target: 根据model_points点云信息,以及旋转偏移矩阵转换过的点云信息[bs, 500, 3] model_points: 目标初始帧对应的点云信息 idx: 训练图片的下标 ''' # 将数据放到device上 points, choose, img, target, model_points, idx = points.to(device), choose.to(device), img.to(device), target.to(device), model_points.to(device), idx.to(device) # 进行预测获得预测的姿态,和特征向量 pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx) ''' pred_r: 旋转矩阵[bs, 500, 4] pred_t: 偏移矩阵[bs, 500, 3] pred_c: 置信度[bs, 500, 1] ''' # 计算loss loss, dis, new_points, new_target = criterion(pred_r, pred_t, pred_c, target, model_points, idx, points, opt.w, opt.refine_start) # 如果已经开始了refiner模型的训练 if opt.refine_start: for iter in range(0, opt.iteration): pred_r, pred_t = refiner(new_points, emb, idx) # 进行refiner预测 # 计算loss得到dis dis, new_points, new_target = criterion_refine(pred_r, pred_t, new_target, model_points, idx, new_points) dis.backward() # dis进行反向传播 else: loss.backward() # 否则,则对loss进行反向传播 train_dis_avg += dis.item() # 用于计算平均距离 train_count += 1 # log信息存储 if train_count % opt.batch_size == 0: logger.info('Train time {0} Epoch {1} Batch {2} Frame {3} Avg_dis:{4}' .format(time.strftime('%Hh %Mm %Ss', time.gmtime(time.time() - st_time)), epoch, int(train_count / opt.batch_size), train_count, train_dis_avg / opt.batch_size)) optimizer.step() # 优化器更新 optimizer.zero_grad() # 优化器梯度清零 train_dis_avg = 0.0 if train_count != 0 and train_count % 1000 == 0: # 模型保存 if opt.refine_start: # 已经开始refine模型 torch.save(refiner.state_dict(), '{0}/pose_refine_model_current.pth'.format(opt.outf)) else: torch.save(estimator.state_dict(), '{0}.pos_model_current.pth'.format(opt.outf)) print('------------ epoch {0} train finish -----------'.format(epoch)) # 进行测试 logger = setup_logger('epoch%d test' % epoch, os.path.join(opt.log_dir, 'epoch_%d_test_log.txt' % epoch)) # 记录测试时间 logger.info('Test time {0}'.format(time.strftime('%Hh %Mm %Ss', time.gmtime(time.time() - st_time)) + ',' + 'Testing started')) test_dis = 0.0 test_count = 0 estimator.eval() # 验证模型构建 refiner.eval() # refiner模型 for j, data in enumerate(test_dataloder, 0): points, choose, img, target, model_points, idx = data # 读取数据 ''' points: 由深度图计算出来的点云,该点云数据以摄像头为参考坐标 choose: 所选择点云的索引[bs, 1, 500] img: 通过box剪切下来的RGB图像 target: 根据model_points点云信息,以及旋转偏移矩阵转换过的点云信息[bs, 500, 3] model_points: 目标初始帧对应的点云信息 idx: 训练图片的下标 ''' # 将数据放到device上 points, choose, img, target, model_points, idx = points.to(device), choose.to(device), img.to( device), target.to(device), model_points.to(device), idx.to(device) # 进行预测获得预测的姿态,和特征向量 pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx) ''' pred_r: 旋转矩阵[bs, 500, 4] pred_t: 偏移矩阵[bs, 500, 3] pred_c: 置信度[bs, 500, 1] ''' # 对结果进行评估 _, dis, new_points, new_target = criterion(pred_r, pred_t, pred_c, target, model_points, idx, points, opt.w, opt.refine_start) # 如果refine模型开始训练,则同样进行评估 if opt.refine_start: for iter in range(0, opt.iteration): pred_r, pred_t = refiner(new_points, emb, idx) dis, new_points, new_target = criterion_refine(pred_r, pred_t, new_target, model_points, idx, new_points) test_dis += dis.item() # 用于计算平均距离 # 保存eval的log logger.info('Test time {0} Test Frame No.{1} dis:{2}'.format(time.strftime('%Hh %Mm %Ss', time.gmtime(time.time() - st_time)), test_count, dis)) test_count += 1 test_dis = test_dis / test_count # 计算平均距离 logger.info('Test time {0} Epoch {1} Test finish avg_dis:{2}'.format(time.strftime('%Hh %Mm %Ss', time.gmtime(time.time() - st_time)), epoch, test_dis)) if test_dis <= best_test: # 如果此次测试结果最好,则保留当前测试结果 best_test = test_dis if opt.refine_start: # 保存refiner torch.save(refiner.state_dict(), '{0}/pose_refine_model_{1}_{2}.pth'.format(opt.outf, epoch, test_dis)) else: torch.save(estimator.state_dict(), '{0}/pose_model_{1}_{2}.pth'.format(opt.outf, epoch, test_dis)) print('----------------test model save finished-------------------') # 参数变化 # 判断模型是否达到衰减要求 if best_test < opt.decay_margin and not opt.decay_start: opt.decay_start = True opt.lr *= opt.lr_rate # 学习率衰减 opt.w *= opt.w_rate # 权重衰减 optimizer = optim.Adam(estimator.parameters(), lr=opt.lr) # 模型没有达到loss阈值要求,refine_start = False,则修改相关参数,传递相关数,更新dataset和dataloader if best_test < opt.refine_margin and not opt.refine_start: opt.refine_start = True opt.batch_size = int(opt.batch_size / opt.iteration) optimizer = optim.Adam(refiner.parameters(), lr=opt.lr) # 训练 dataset = PoseDataset('train', opt.num_points, True, opt.dataset_root, opt.noise_trans, opt.refine_start) dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.workers) # 测试 test_dataset = PoseDataset('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start) test_dataloder = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers) opt.sym_list = dataset.get_sym_list() opt.num_points_mesh = dataset.get_num_points_mesh() print('----------Dataset loaded!---------\nlength of the training set: {0}\nlength of the testing set: {1}\nnumber ' 'of sample points on mesh: {2}\nsymmetry object list: {3}'.format(len(dataset), len(test_dataset), opt.num_points_mesh, opt.sym_list)) criterion = Loss(opt.num_points_mesh, opt.sym_list) criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list)
refiner.to(device) estimator.load_state_dict(torch.load(opt.model)) # PoseNet模型参数加载 refiner.load_state_dict(torch.load(opt.refine_model)) # PoseRefineNet模型参数加载 estimator.eval() refiner.eval() # 以eval模式,加载数据 test_dataset = PoseDataset('eval', num_points, False, opt.dataset_root, 0.0, True) test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False) sym_list = test_dataset.get_sym_list() # 获取对称物体的索引 num_points_mesh = test_dataset.get_num_points_mesh() # 500 criterion = Loss(num_points_mesh, sym_list) # Loss加载 criterion_refine = Loss_refine(num_points_mesh, sym_list) # Loss_refine加载 diameter = [] # 存储模型给定的半径标准,与结果对比 meta_file = open(dataset_config_dir, 'r') # 读取model.info文件 meta = yaml.load(meta_file) # 加载model.info文件 for obj in objlist: diameter.append(meta[obj]['diameter'] / 1000.0 * 0.1) # 存储到diameter中 success_count = [0 for i in range(num_objects)] # 用于记录每个目标合格的数目 num_count = [0 for i in range(num_objects)] # 用于记录每个目标物体测试总数目 fw = open('{0}/eval_result_logs.txt'.format(output_result_dir), 'w') # 日志记录 for i, data in enumerate(test_dataloader, 0): # 数据集遍历