def eval_model(model, dataloader, eval_epoch=None, verbose=False): print("Start evaluation...") since = time.time() device = next(model.parameters()).device if eval_epoch is not None: model_path = str(Path(cfg.OUTPUT_PATH) / "params" / "params_{:04}.pt".format(eval_epoch)) print("Loading model parameters from {}".format(model_path)) model.load_state_dict(torch.load(model_path)) was_training = model.training model.eval() ds = dataloader.dataset ds.set_num_graphs(cfg.EVAL.num_graphs_in_matching_instance) classes = ds.classes cls_cache = ds.cls accs = torch.zeros(len(classes), device=device) f1_scores = torch.zeros(len(classes), device=device) for i, cls in enumerate(classes): if verbose: print("Evaluating class {}: {}/{}".format(cls, i, len(classes))) running_since = time.time() iter_num = 0 ds.set_cls(cls) acc_match_num = torch.zeros(1, device=device) acc_total_num = torch.zeros(1, device=device) tp = torch.zeros(1, device=device) fp = torch.zeros(1, device=device) fn = torch.zeros(1, device=device) for k, inputs in enumerate(dataloader): data_list = [_.cuda() for _ in inputs["images"]] points_gt = [_.cuda() for _ in inputs["Ps"]] n_points_gt = [_.cuda() for _ in inputs["ns"]] edges = [_.to("cuda") for _ in inputs["edges"]] perm_mat_list = [perm_mat.cuda() for perm_mat in inputs["gt_perm_mat"]] batch_num = data_list[0].size(0) iter_num = iter_num + 1 visualize = k == 0 and cfg.visualize visualization_params = {**cfg.visualization_params, **dict(string_info=cls, true_matchings=perm_mat_list)} with torch.set_grad_enabled(False): s_pred_list = model( data_list, points_gt, edges, n_points_gt, perm_mat_list, visualize_flag=visualize, visualization_params=visualization_params, ) _, _acc_match_num, _acc_total_num = matching_accuracy(s_pred_list[0], perm_mat_list[0]) _tp, _fp, _fn = get_pos_neg(s_pred_list[0], perm_mat_list[0]) acc_match_num += _acc_match_num acc_total_num += _acc_total_num tp += _tp fp += _fp fn += _fn if iter_num % cfg.STATISTIC_STEP == 0 and verbose: running_speed = cfg.STATISTIC_STEP * batch_num / (time.time() - running_since) print("Class {:<8} Iteration {:<4} {:>4.2f}sample/s".format(cls, iter_num, running_speed)) running_since = time.time() accs[i] = acc_match_num / acc_total_num f1_scores[i] = f1_score(tp, fp, fn) if verbose: print("Class {} acc = {:.4f} F1 = {:.4f}".format(cls, accs[i], f1_scores[i])) time_elapsed = time.time() - since print("Evaluation complete in {:.0f}m {:.0f}s".format(time_elapsed // 60, time_elapsed % 60)) model.train(mode=was_training) ds.cls = cls_cache print("Matching accuracy") for cls, single_acc, f1_sc in zip(classes, accs, f1_scores): print("{} = {:.4f}, {:.4f}".format(cls, single_acc, f1_sc)) print("average = {:.4f}, {:.4f}".format(torch.mean(accs), torch.mean(f1_scores))) return accs, f1_scores
def eval_model(model, dataloader, eval_epoch=None, verbose=False): print('Start evaluation...') since = time.time() device = next(model.parameters()).device if eval_epoch is not None: model_path = str( Path(cfg.OUTPUT_PATH) / 'params' / 'params_{:04}.pt'.format(eval_epoch)) print('Loading model parameters from {}'.format(model_path)) load_model(model, model_path) was_training = model.training model.eval() ds = dataloader.dataset classes = ds.classes cls_cache = ds.cls lap_solver = hungarian accs = torch.zeros(len(classes)).cuda() f1s = torch.zeros(len(classes)).cuda() pcs = torch.zeros(len(classes)).cuda() rcl = torch.zeros(len(classes)).cuda() for i, cls in enumerate(classes): if verbose: print('Evaluating class {}: {}/{}'.format(cls, i, len(classes))) running_since = time.time() iter_num = 0 ds.cls = cls acc_match_num = torch.zeros(1).cuda() acc_total_num = torch.zeros(1).cuda() for inputs in dataloader: if 'images' in inputs: data1, data2 = [_.cuda() for _ in inputs['images']] inp_type = 'img' elif 'features' in inputs: data1, data2 = [_.cuda() for _ in inputs['features']] inp_type = 'feat' else: raise ValueError( 'no valid data key (\'images\' or \'features\') found from dataloader!' ) P1_gt, P2_gt = [_.cuda() for _ in inputs['Ps']] n1_gt, n2_gt = [_.cuda() for _ in inputs['ns']] e1_gt, e2_gt = [_.cuda() for _ in inputs['es']] G1_gt, G2_gt = [_.cuda() for _ in inputs['Gs']] H1_gt, H2_gt = [_.cuda() for _ in inputs['Hs']] KG, KH = [_.cuda() for _ in inputs['Ks']] edge_src = [_.cuda() for _ in inputs['edge_src']] edge_tgt = [_.cuda() for _ in inputs['edge_tgt']] edge_feat1 = [_.cuda() for _ in inputs['edge_feat1']] edge_feat2 = [_.cuda() for _ in inputs['edge_feat2']] perm_mat = inputs['gt_perm_mat'].cuda() batch_num = data1.size(0) iter_num = iter_num + 1 with torch.set_grad_enabled(False): s_pred, U_src, F_src, U_tgt, F_tgt, AA, BB = \ model(data1, data2, P1_gt, P2_gt, G1_gt, G2_gt, H1_gt, H2_gt, n1_gt, n2_gt, KG, KH, edge_src, edge_tgt, edge_feat1, edge_feat2, perm_mat, inp_type) lb = 0.1 Xnew = lap_solver(s_pred, n1_gt, n2_gt) A_src = torch.bmm(G1_gt, H1_gt.transpose(1, 2)) A_tgt = torch.bmm(G2_gt, H2_gt.transpose(1, 2)) for miter in range(10): X = qc_opt(A_src, A_tgt, s_pred, Xnew, lb) Xnew = lap_solver(X, n1_gt, n2_gt) s_pred_perm = lap_solver(Xnew, n1_gt, n2_gt) _, _acc_match_num, _acc_total_num = matching_accuracy( s_pred_perm, perm_mat, n1_gt) acc_match_num += _acc_match_num acc_total_num += _acc_total_num if iter_num % cfg.STATISTIC_STEP == 0 and verbose: running_speed = cfg.STATISTIC_STEP * batch_num / ( time.time() - running_since) print('Class {:<8} Iteration {:<4} {:>4.2f}sample/s'.format( cls, iter_num, running_speed)) running_since = time.time() accs[i] = acc_match_num / acc_total_num if verbose: print('Class {} acc = {:.4f}'.format(cls, accs[i])) time_elapsed = time.time() - since print('Evaluation complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) model.train(mode=was_training) ds.cls = cls_cache print('Matching accuracy') for cls, single_acc in zip(classes, accs): print('{} = {:.4f}'.format(cls, single_acc)) print('average = {:.4f}'.format(torch.mean(accs))) return accs
def eval_model(model, dataloader, eval_epoch=None, verbose=False, train_epoch=None): print('Start evaluation...') device = next(model.parameters()).device if eval_epoch is not None: model_path = str( Path(cfg.OUTPUT_PATH) / 'params' / 'params_{:04}.pt'.format(eval_epoch)) print('Loading model parameters from {}'.format(model_path)) load_model(model, model_path) score_thresh = 0.2 print("score_thresh{}".format(score_thresh)) if train_epoch is not None: score_thresh = min(train_epoch * 0.1, 0.5) print("score_thresh{}".format(score_thresh)) model.eval() ds = dataloader.dataset lap_solver = hungarian running_since = time.time() iter_num = 0 score_th_list1 = list(range(9, 0, -1)) score_th_list1 = [i / 10 for i in score_th_list1] score_th_list2 = list(range(10, 0, -1)) score_th_list2 = [i / 1000 for i in score_th_list2] score_th_list = score_th_list1 + score_th_list2 #score_th_list1 acc_match_num = torch.zeros(len(score_th_list), device=device) #torch.zeros(1, device=device) acc_total_num = torch.zeros(len(score_th_list), device=device) #torch.zeros(1, device=device) acc_total_pred_num = torch.zeros( len(score_th_list), device=device) #torch.zeros(1, device=device) for inputs in dataloader: data1, data2 = [_.cuda() for _ in inputs['images']] P1_gt, P2_gt = [_.cuda() for _ in inputs['Ps']] n1_gt, n2_gt = [_.cuda() for _ in inputs['ns']] perm_mat = inputs['gt_perm_mat'].cuda() weights = inputs['ws'].cuda() batch_num = data1.size(0) iter_num = iter_num + 1 with torch.set_grad_enabled(False): s_pred, pred,match_emb1,match_emb2,match_edgeemb1,match_edgeemb2,indeces1,indeces2,newn1_gt,newn2_gt= \ model(data1, data2, P1_gt, P2_gt, n1_gt, n2_gt,train_stage=False,perm_mat=perm_mat,score_thresh=score_thresh) for idx, score_th in enumerate(score_th_list): s_pred_perm = lap_solver(s_pred, newn1_gt, newn2_gt, indeces1, indeces2, n1_gt, n2_gt, score_th=score_th) _, _acc_match_num, _acc_total_num, _acc_totalpred_num = matching_accuracy( s_pred_perm, perm_mat, n1_gt, n2_gt, weights) acc_match_num[idx] += _acc_match_num acc_total_num[idx] += _acc_total_num acc_total_pred_num[idx] += _acc_totalpred_num if iter_num % cfg.STATISTIC_STEP == 0 and verbose: running_speed = cfg.STATISTIC_STEP * batch_num / (time.time() - running_since) print('Iteration {:<4} {:>4.2f}sample/s'.format( iter_num, running_speed)) running_since = time.time() recalls = acc_match_num / acc_total_num accs_prec = acc_match_num / acc_total_pred_num F1 = 2 * recalls * accs_prec / (accs_prec + recalls) print("score") print(score_th_list) print("recall") print(recalls.cpu().numpy().tolist()) print("accu") print(accs_prec.cpu().numpy().tolist()) print("F1") print(F1.cpu().numpy().tolist()) return None
def eval_model(model, dataloader, eval_epoch=None, verbose=False, train_epoch=None): print('Start evaluation...') since = time.time() device = next(model.parameters()).device if eval_epoch is not None: model_path = str( Path(cfg.OUTPUT_PATH) / 'params' / 'params_{:04}.pt'.format(eval_epoch)) print('Loading model parameters from {}'.format(model_path)) load_model(model, model_path) score_thresh = 0.5 print("score_thresh{}".format(score_thresh)) if train_epoch is not None: score_thresh = min(train_epoch * 0.1, 0.5) print("score_thresh{}".format(score_thresh)) was_training = model.training model.eval() lap_solver = hungarian running_since = time.time() iter_num = 0 acc_match_num = torch.zeros(1, device=device) acc_total_num = torch.zeros(1, device=device) acc_total_pred_num = torch.zeros(1, device=device) for inputs in dataloader: data1, data2 = [_.cuda() for _ in inputs['images']] P1_gt, P2_gt = [_.cuda() for _ in inputs['Ps']] n1_gt, n2_gt = [_.cuda() for _ in inputs['ns']] perm_mat = inputs['gt_perm_mat'].cuda() batch_num = data1.size(0) iter_num = iter_num + 1 with torch.set_grad_enabled(False): s_pred,indeces1,indeces2,newn1_gt,newn2_gt= \ model(data1, data2, P1_gt, P2_gt, n1_gt, n2_gt,train_stage=False,perm_mat=perm_mat,score_thresh=score_thresh) s_pred_perm = lap_solver(s_pred, newn1_gt, newn2_gt, indeces1, indeces2, n1_gt, n2_gt) _acc_match_num, _acc_total_num, _acc_totalpred_num = matching_accuracy( s_pred_perm, perm_mat, n1_gt, n2_gt) acc_match_num += _acc_match_num acc_total_num += _acc_total_num acc_total_pred_num += _acc_totalpred_num if iter_num % cfg.STATISTIC_STEP == 0 and verbose: running_speed = cfg.STATISTIC_STEP * batch_num / (time.time() - running_since) print('Iteration {:<4} {:>4.2f}sample/s'.format( iter_num, running_speed)) running_since = time.time() recalls = acc_match_num / acc_total_num accs_prec = acc_match_num / acc_total_pred_num time_elapsed = time.time() - since print('Evaluation complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) model.train(mode=was_training) print('Matching accuracy') print('recall = {:.4f}'.format(recalls.item())) print('precision = {:.4f}'.format(accs_prec.item())) return recalls
def train_eval_model(model, criterion, optimizer, dataloader, tfboard_writer, num_epochs=25, resume=False, start_epoch=0): print('Start training...') since = time.time() dataset_size = len(dataloader['train'].dataset) displacement = Displacement() lap_solver = hungarian device = next(model.parameters()).device print('model on device: {}'.format(device)) checkpoint_path = Path(cfg.OUTPUT_PATH) / 'params' if not checkpoint_path.exists(): checkpoint_path.mkdir(parents=True) if resume: assert start_epoch != 0 model_path = str(checkpoint_path / 'params_{:04}.pt'.format(start_epoch)) print('Loading model parameters from {}'.format(model_path)) load_model(model, model_path) optim_path = str(checkpoint_path / 'optim_{:04}.pt'.format(start_epoch)) print('Loading optimizer state from {}'.format(optim_path)) optimizer.load_state_dict(torch.load(optim_path)) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.TRAIN.LR_STEP, gamma=cfg.TRAIN.LR_DECAY, last_epoch=cfg.TRAIN.START_EPOCH - 1) for epoch in range(start_epoch, num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) model.train() # Set model to training mode print('lr = ' + ', '.join(['{:.2e}'.format(x['lr']) for x in optimizer.param_groups])) epoch_loss = 0.0 running_loss = 0.0 running_since = time.time() iter_num = 0 # Iterate over data. for inputs in dataloader['train']: if 'images' in inputs: data1, data2 = [_.cuda() for _ in inputs['images']] inp_type = 'img' elif 'features' in inputs: data1, data2 = [_.cuda() for _ in inputs['features']] inp_type = 'feat' else: raise ValueError('no valid data key (\'images\' or \'features\') found from dataloader!') P1_gt, P2_gt = [_.cuda() for _ in inputs['Ps']] n1_gt, n2_gt = [_.cuda() for _ in inputs['ns']] if 'es' in inputs: e1_gt, e2_gt = [_.cuda() for _ in inputs['es']] G1_gt, G2_gt = [_.cuda() for _ in inputs['Gs']] H1_gt, H2_gt = [_.cuda() for _ in inputs['Hs']] KG, KH = [_.cuda() for _ in inputs['Ks']] perm_mat = inputs['gt_perm_mat'].cuda() iter_num = iter_num + 1 # zero the parameter gradients optimizer.zero_grad() with torch.set_grad_enabled(True): # forward if 'es' in inputs: s_pred, d_pred = \ model(data1, data2, P1_gt, P2_gt, G1_gt, G2_gt, H1_gt, H2_gt, n1_gt, n2_gt, KG, KH, inp_type) else: s_pred, d_pred = \ model(data1, data2, P1_gt, P2_gt, n1_gt, n2_gt) multi_loss = [] if cfg.TRAIN.LOSS_FUNC == 'offset': d_gt, grad_mask = displacement(perm_mat, P1_gt, P2_gt, n1_gt) loss = criterion(d_pred, d_gt, grad_mask) elif cfg.TRAIN.LOSS_FUNC == 'perm': loss = criterion(s_pred, perm_mat, n1_gt, n2_gt) else: raise ValueError('Unknown loss function {}'.format(cfg.TRAIN.LOSS_FUNC)) # backward + optimize loss.backward() optimizer.step() if cfg.MODULE == 'NGM.hypermodel': tfboard_writer.add_scalars( 'weight', {'w2': model.module.weight2, 'w3': model.module.weight3}, epoch * cfg.TRAIN.EPOCH_ITERS + iter_num ) # training accuracy statistic acc, _, __ = matching_accuracy(lap_solver(s_pred, n1_gt, n2_gt), perm_mat, n1_gt) # tfboard writer loss_dict = {'loss_{}'.format(i): l.item() for i, l in enumerate(multi_loss)} loss_dict['loss'] = loss.item() tfboard_writer.add_scalars('loss', loss_dict, epoch * cfg.TRAIN.EPOCH_ITERS + iter_num) accdict = dict() accdict['matching accuracy'] = acc tfboard_writer.add_scalars( 'training accuracy', accdict, epoch * cfg.TRAIN.EPOCH_ITERS + iter_num ) # statistics running_loss += loss.item() * perm_mat.size(0) epoch_loss += loss.item() * perm_mat.size(0) if iter_num % cfg.STATISTIC_STEP == 0: running_speed = cfg.STATISTIC_STEP * perm_mat.size(0) / (time.time() - running_since) print('Epoch {:<4} Iteration {:<4} {:>4.2f}sample/s Loss={:<8.4f}' .format(epoch, iter_num, running_speed, running_loss / cfg.STATISTIC_STEP / perm_mat.size(0))) tfboard_writer.add_scalars( 'speed', {'speed': running_speed}, epoch * cfg.TRAIN.EPOCH_ITERS + iter_num ) running_loss = 0.0 running_since = time.time() epoch_loss = epoch_loss / dataset_size save_model(model, str(checkpoint_path / 'params_{:04}.pt'.format(epoch + 1))) torch.save(optimizer.state_dict(), str(checkpoint_path / 'optim_{:04}.pt'.format(epoch + 1))) print('Epoch {:<4} Loss: {:.4f}'.format(epoch, epoch_loss)) print() # Eval in each epoch accs = eval_model(model, dataloader['test']) acc_dict = {"{}".format(cls): single_acc for cls, single_acc in zip(dataloader['train'].dataset.classes, accs)} acc_dict['average'] = torch.mean(accs) tfboard_writer.add_scalars( 'Eval acc', acc_dict, (epoch + 1) * cfg.TRAIN.EPOCH_ITERS ) scheduler.step() time_elapsed = time.time() - since print('Training complete in {:.0f}h {:.0f}m {:.0f}s' .format(time_elapsed // 3600, (time_elapsed // 60) % 60, time_elapsed % 60)) return model
def eval_model(model, dataloader, eval_epoch=None, metric_is_save=False, estimate_iters=1, viz=None, usepgm=True, userefine=False, save_filetime='time'): print('-----------------Start evaluation-----------------') lap_solver = hungarian permevalLoss = PermLoss() since = time.time() all_val_metrics_np = defaultdict(list) iter_num = 0 dataset_size = len(dataloader.dataset) print('train datasize: {}'.format(dataset_size)) device = next(model.parameters()).device print('model on device: {}'.format(device)) if eval_epoch is not None: if eval_epoch == -1: model_path = str( Path(cfg.OUTPUT_PATH) / 'params' / 'params_best.pt') print('Loading best model parameters') load_model(model, model_path) else: model_path = str( Path(cfg.OUTPUT_PATH) / 'params' / 'params_{:04}.pt'.format(eval_epoch)) print('Loading model parameters from {}'.format(model_path)) load_model(model, model_path) was_training = model.training model.eval() running_since = time.time() for inputs in dataloader: P1_gt, P2_gt = [_.cuda() for _ in inputs['Ps']] n1_gt, n2_gt = [_.cuda() for _ in inputs['ns']] A1_gt, A2_gt = [_.cuda() for _ in inputs['As']] perm_mat = inputs['gt_perm_mat'].cuda() T1_gt, T2_gt = [_.cuda() for _ in inputs['Ts']] Inlier_src_gt, Inlier_ref_gt = [_.cuda() for _ in inputs['Ins']] Label = torch.tensor([_ for _ in inputs['label']]) batch_cur_size = perm_mat.size(0) iter_num = iter_num + 1 infer_time = time.time() with torch.set_grad_enabled(False): if cfg.EVAL.ITERATION: P1_gt_copy = P1_gt.clone() P2_gt_copy = P2_gt.clone() P1_gt_copy_inv = P1_gt.clone() P2_gt_copy_inv = P2_gt.clone() s_perm_mat = caliters_perm(model, P1_gt_copy, P2_gt_copy, A1_gt, A2_gt, n1_gt, n2_gt, estimate_iters) if cfg.EVAL.CYCLE: s_perm_mat_inv = caliters_perm(model, P2_gt_copy_inv, P1_gt_copy_inv, A2_gt, A1_gt, n2_gt, n1_gt, estimate_iters) s_perm_mat = s_perm_mat * s_perm_mat_inv.permute(0, 2, 1) permevalloss = torch.tensor([0]) else: s_prem_tensor, Inlier_src_pre, Inlier_ref_pre_tensor = model( P1_gt, P2_gt, A1_gt, A2_gt, n1_gt, n2_gt) if cfg.EVAL.CYCLE: s_prem_tensor_inv, Inlier_src_pre_inv, Inlier_ref_pre_tensor_inv = model( P2_gt, P1_gt, A2_gt, A1_gt, n2_gt, n1_gt) if cfg.PGM.USEINLIERRATE: s_prem_tensor = Inlier_src_pre * s_prem_tensor * Inlier_ref_pre_tensor.transpose( 2, 1).contiguous() if cfg.EVAL.CYCLE: s_prem_tensor_inv = Inlier_src_pre_inv * s_prem_tensor_inv * \ Inlier_ref_pre_tensor_inv.transpose(2,1).contiguous() permevalloss = permevalLoss(s_prem_tensor, perm_mat, n1_gt, n2_gt) s_perm_mat = lap_solver(s_prem_tensor, n1_gt, n2_gt, Inlier_src_pre, Inlier_ref_pre_tensor) if cfg.EVAL.CYCLE: s_perm_mat_inv = lap_solver(s_prem_tensor_inv, n2_gt, n1_gt, Inlier_src_pre_inv, Inlier_ref_pre_tensor_inv) s_perm_mat = s_perm_mat * s_perm_mat_inv.permute(0, 2, 1) #test time compute_transform(s_perm_mat, P1_gt[:, :, :3], P2_gt[:, :, :3], T1_gt[:, :3, :3], T1_gt[:, :3, 3]) infer_time = time.time() - infer_time match_metrics = matching_accuracy(s_perm_mat, perm_mat, n1_gt) perform_metrics = compute_metrics(s_perm_mat, P1_gt[:, :, :3], P2_gt[:, :, :3], T1_gt[:, :3, :3], T1_gt[:, :3, 3], viz=viz, usepgm=usepgm, userefine=userefine) for k in match_metrics: all_val_metrics_np[k].append(match_metrics[k]) for k in perform_metrics: all_val_metrics_np[k].append(perform_metrics[k]) all_val_metrics_np['label'].append(Label) all_val_metrics_np['loss'].append( np.repeat(permevalloss.item(), batch_cur_size)) all_val_metrics_np['infertime'].append( np.repeat(infer_time / batch_cur_size, batch_cur_size)) if iter_num % cfg.STATISTIC_STEP == 0 and metric_is_save: running_speed = cfg.STATISTIC_STEP * batch_cur_size / ( time.time() - running_since) print('Iteration {:<4} {:>4.2f}sample/s'.format( iter_num, running_speed)) running_since = time.time() all_val_metrics_np = { k: np.concatenate(all_val_metrics_np[k]) for k in all_val_metrics_np } summary_metrics = summarize_metrics(all_val_metrics_np) print('Mean-Loss: {:.4f} GT-Acc:{:.4f} Pred-Acc:{:.4f}'.format( summary_metrics['loss'], summary_metrics['acc_gt'], summary_metrics['acc_pred'])) print_metrics(summary_metrics) if metric_is_save: np.save( str( Path(cfg.OUTPUT_PATH) / ('eval_log_' + save_filetime + '_metric')), all_val_metrics_np) time_elapsed = time.time() - since print('Evaluation complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) model.train(mode=was_training) return summary_metrics
def train_eval_model(model, permLoss, optimizer, dataloader, num_epochs=25, resume=False, start_epoch=0, viz=None, savefiletime='time'): print('**************************************') print('Start training...') dataset_size = len(dataloader['train'].dataset) print('train datasize: {}'.format(dataset_size)) since = time.time() lap_solver = hungarian optimal_acc = 0.0 optimal_rot = np.inf device = next(model.parameters()).device print('model on device: {}'.format(device)) checkpoint_path = Path(cfg.OUTPUT_PATH) / 'params' if not checkpoint_path.exists(): checkpoint_path.mkdir(parents=True) if resume: assert start_epoch != 0 model_path = str(checkpoint_path / 'params_{:04}.pt'.format(start_epoch)) print('Loading model parameters from {}'.format(model_path)) load_model(model, model_path) optim_path = str(checkpoint_path / 'optim_{:04}.pt'.format(start_epoch)) print('Loading optimizer state from {}'.format(optim_path)) optimizer.load_state_dict(torch.load(optim_path)) scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=cfg.TRAIN.LR_STEP, gamma=cfg.TRAIN.LR_DECAY, last_epoch=cfg.TRAIN.START_EPOCH - 1) for epoch in range(start_epoch, num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) model.train() # Set model to training mode print('lr = ' + ', '.join( ['{:.2e}'.format(x['lr']) for x in optimizer.param_groups])) iter_num = 0 running_since = time.time() all_train_metrics_np = defaultdict(list) # Iterate over data3d. for inputs in dataloader['train']: P1_gt, P2_gt = [_.cuda() for _ in inputs['Ps']] #keypoints coordinate n1_gt, n2_gt = [_.cuda() for _ in inputs['ns']] #keypoints number A1_gt, A2_gt = [_.cuda() for _ in inputs['As']] #edge connect matrix perm_mat = inputs['gt_perm_mat'].cuda() #permute matrix T1_gt, T2_gt = [_.cuda() for _ in inputs['Ts']] Inlier_src_gt, Inlier_ref_gt = [_.cuda() for _ in inputs['Ins']] batch_cur_size = perm_mat.size(0) iter_num = iter_num + 1 # zero the parameter gradients optimizer.zero_grad() with torch.set_grad_enabled(True): # forward s_pred, Inlier_src_pre, Inlier_ref_pre = model( P1_gt, P2_gt, A1_gt, A2_gt, n1_gt, n2_gt) # multi_loss = [] if cfg.DATASET.NOISE_TYPE == 'clean': permloss = permLoss(s_pred, perm_mat, n1_gt, n2_gt) loss = permloss else: if cfg.PGM.USEINLIERRATE: s_pred = Inlier_src_pre * s_pred * Inlier_ref_pre.transpose( 2, 1).contiguous() permloss = permLoss(s_pred, perm_mat, n1_gt, n2_gt) loss = permloss # backward + optimize loss.backward() optimizer.step() # training accuracy statistic s_perm_mat = lap_solver(s_pred, n1_gt, n2_gt, Inlier_src_pre, Inlier_ref_pre) match_metrics = matching_accuracy(s_perm_mat, perm_mat, n1_gt) perform_metrics = compute_metrics(s_perm_mat, P1_gt[:, :, :3], P2_gt[:, :, :3], T1_gt[:, :3, :3], T1_gt[:, :3, 3]) for k in match_metrics: all_train_metrics_np[k].append(match_metrics[k]) for k in perform_metrics: all_train_metrics_np[k].append(perform_metrics[k]) all_train_metrics_np['loss'].append(np.repeat(loss.item(), 4)) if iter_num % cfg.STATISTIC_STEP == 0: running_speed = cfg.STATISTIC_STEP * batch_cur_size / ( time.time() - running_since) # globalstep = epoch * dataset_size + iter_num * batch_cur_size print( 'Epoch {:<4} Iteration {:<4} {:>4.2f}sample/s Loss={:<8.4f} GT-Acc:{:.4f} Pred-Acc:{:.4f}' .format( epoch, iter_num, running_speed, np.mean( np.concatenate(all_train_metrics_np['loss']) [-cfg.STATISTIC_STEP * batch_cur_size:]), np.mean( np.concatenate(all_train_metrics_np['acc_gt']) [-cfg.STATISTIC_STEP * batch_cur_size:]), np.mean( np.concatenate( all_train_metrics_np['acc_pred']) [-cfg.STATISTIC_STEP * batch_cur_size:]))) running_since = time.time() all_train_metrics_np = { k: np.concatenate(all_train_metrics_np[k]) for k in all_train_metrics_np } summary_metrics = summarize_metrics(all_train_metrics_np) print('Epoch {:<4} Mean-Loss: {:.4f} GT-Acc:{:.4f} Pred-Acc:{:.4f}'. format(epoch, summary_metrics['loss'], summary_metrics['acc_gt'], summary_metrics['acc_pred'])) print_metrics(summary_metrics) save_model(model, str(checkpoint_path / 'params_{:04}.pt'.format(epoch + 1))) torch.save(optimizer.state_dict(), str(checkpoint_path / 'optim_{:04}.pt'.format(epoch + 1))) # to save values during training metric_is_save = False if metric_is_save: np.save( str( Path(cfg.OUTPUT_PATH) / ('train_log_' + savefiletime + '_metric')), all_train_metrics_np) if viz is not None: viz.update('train_loss', epoch, {'loss': summary_metrics['loss']}) viz.update('train_acc', epoch, {'acc': summary_metrics['acc_gt']}) viz.update( 'train_metric', epoch, { 'r_mae': summary_metrics['r_mae'], 't_mae': summary_metrics['t_mae'] }) # Eval in each epochgi val_metrics = eval_model(model, dataloader['val']) if viz is not None: viz.update('val_acc', epoch, {'acc': val_metrics['acc_gt']}) viz.update('val_metric', epoch, { 'r_mae': val_metrics['r_mae'], 't_mae': val_metrics['t_mae'] }) if optimal_acc < val_metrics['acc_gt']: optimal_acc = val_metrics['acc_gt'] print('Current best acc model is {}'.format(epoch + 1)) if optimal_rot > val_metrics['r_mae']: optimal_rot = val_metrics['r_mae'] print('Current best rotation model is {}'.format(epoch + 1)) # Test in each epochgi test_metrics = eval_model(model, dataloader['test']) if viz is not None: viz.update('test_acc', epoch, {'acc': test_metrics['acc_gt']}) viz.update('test_metric', epoch, { 'r_mae': test_metrics['r_mae'], 't_mae': test_metrics['t_mae'] }) scheduler.step() time_elapsed = time.time() - since print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format( time_elapsed // 3600, (time_elapsed // 60) % 60, time_elapsed % 60)) return model