def dist(self, X, Y, *args, **kwargs): #X, Y batches [N, C, H, W] N = X.shape[0] d1 = torch.clamp_min(1 - (self.ssim_d(X, Y)).view(N, -1).mean(dim=1), 0.0) return d1
def train(gpu, ngpus_per_node, args): print("Using GPU %d for training" % gpu) args.gpu = gpu if args.distributed: dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=ngpus_per_node, rank=args.gpu) model = EppFlowNet(args=args) if args.distributed: torch.cuda.set_device(args.gpu) args.batch_size = int(args.batch_size / ngpus_per_node) model = nn.SyncBatchNorm.convert_sync_batchnorm(module=model) model = model.to(f'cuda:{args.gpu}') model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu], find_unused_parameters=True, output_device=args.gpu) else: model = torch.nn.DataParallel(model) model.cuda() logroot = os.path.join(args.logroot, args.name) print("Parameter Count: %d, saving location: %s" % (count_parameters(model), logroot)) if args.restore_ckpt is not None: print("=> loading checkpoint '{}'".format(args.restore_ckpt)) loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.restore_ckpt, map_location=loc) model.load_state_dict(checkpoint, strict=False) with open( os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'eppflownet/pose_bin{}.pickle'.format( str(int(32 / args.num_angs)))), 'rb') as f: linlogdedge = pickle.load(f) minidx = np.argmin(np.abs(linlogdedge)) print("Min index is :%d, val: %f" % (minidx, linlogdedge[minidx])) model.train() train_entries, evaluation_entries, seqmap = read_splits(ngpus_per_node) interval = np.floor(len(evaluation_entries) / ngpus_per_node).astype( np.int).item() if args.gpu == ngpus_per_node - 1: stidx = int(interval * args.gpu) edidx = len(evaluation_entries) else: stidx = int(interval * args.gpu) edidx = int(interval * (args.gpu + 1)) print("GPU %d, eval fromm %d to %d, in total %d" % (gpu, stidx, edidx, edidx - stidx)) train_dataset = KITTI_eigen(root=args.dataset_root, inheight=args.inheight, inwidth=args.inwidth, entries=train_entries, maxinsnum=args.maxinsnum, linlogdedge=linlogdedge, num_samples=args.num_angs, depthvls_root=args.depthvlsgt_root, prediction_root=args.prediction_root, ins_root=args.ins_root, mdPred_root=args.mdPred_root, RANSACPose_root=args.RANSACPose_root, istrain=True, muteaug=False, banremovedup=True, isgarg=False) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) if args.distributed else None train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, pin_memory=True, num_workers=int(args.num_workers / ngpus_per_node), drop_last=True, sampler=train_sampler) eval_dataset = KITTI_odom(root=args.dataset_root, inheight=args.evalheight, inwidth=args.evalwidth, entries=evaluation_entries[stidx:edidx], maxinsnum=args.maxinsnum, linlogdedge=linlogdedge, num_samples=args.num_angs, depthvls_root=args.depthvlsgt_root, prediction_root=args.prediction_root, ins_root=args.ins_root, mdPred_root=args.mdPred_root, RANSACPose_root=args.RANSACPose_root, istrain=False, isgarg=True) eval_loader = data.DataLoader(eval_dataset, batch_size=2, pin_memory=True, num_workers=3, drop_last=False) print( "Training splits contain %d images while test splits contain %d images" % (train_dataset.__len__(), eval_dataset.__len__())) if args.distributed: group = dist.new_group([i for i in range(ngpus_per_node)]) optimizer, scheduler = fetch_optimizer(args, model, int(train_dataset.__len__() / 2)) total_steps = 0 if args.gpu == 0: logger = Logger(logroot) logger_evaluation = Logger( os.path.join(args.logroot, 'evaluation_eigen_background', args.name)) logger_evaluation_org = Logger( os.path.join(args.logroot, 'evaluation_eigen_background', "{}_org".format(args.name))) logger.create_summarywriter() logger_evaluation.create_summarywriter() logger_evaluation_org.create_summarywriter() VAL_FREQ = 5000 epoch = 0 minabsl = 1e10 ssim = SSIM() st = time.time() should_keep_training = True while should_keep_training: train_sampler.set_epoch(epoch) for i_batch, data_blob in enumerate(train_loader): optimizer.zero_grad() image1 = data_blob['img1'].cuda(gpu) / 255.0 image2 = data_blob['img2'].cuda(gpu) / 255.0 intrinsic = data_blob['intrinsic'].cuda(gpu) insmap = data_blob['insmap'].cuda(gpu) posepred = data_blob['posepred'].cuda(gpu) mD_pred = data_blob['mdDepth_pred'].cuda(gpu) ang_decps_pad = data_blob['ang_decps_pad'].cuda(gpu) scl_decps_pad = data_blob['scl_decps_pad'].cuda(gpu) mvd_decps_pad = data_blob['mvd_decps_pad'].cuda(gpu) rel_pose = data_blob['rel_pose'].cuda(gpu) posepred = posepred[:, :, 0] ang_decps_pad = ang_decps_pad[:, :, 0] scl_decps_pad = scl_decps_pad[:, :, 0] mvd_decps_pad = mvd_decps_pad[:, :, 0] # IMUlocations1 = data_blob['IMUlocations1'].cuda(gpu) # leftarrs1 = data_blob['leftarrs1'].cuda(gpu) # rightarrs1 = data_blob['rightarrs1'].cuda(gpu) # IMUlocations2 = data_blob['IMUlocations2'].cuda(gpu) # leftarrs2 = data_blob['leftarrs2'].cuda(gpu) # rightarrs2 = data_blob['rightarrs2'].cuda(gpu) gpsscale = torch.sqrt(torch.sum(rel_pose[:, 0:3, 3]**2, dim=1)) mD_pred_clipped = torch.clamp_min(mD_pred, min=args.min_depth_pred) # tensor2disp(1/mD_pred_clipped, vmax=0.15, viewind=0).show() outputs = model(image1, image2, mD_pred_clipped, intrinsic, posepred, ang_decps_pad, scl_decps_pad, mvd_decps_pad, insmap) rpjloss_cale, rpjloss_fin = get_reprojection_loss( image1, outputs, ssim, args) scaleloss = get_scale_loss(gpsscale=gpsscale, outputs=outputs, num_angs=args.num_angs) seqloss = 0 if args.enable_seqloss: loss = (rpjloss_cale + rpjloss_fin) / 2 + seqloss elif args.enable_scalelossonly: loss = (rpjloss_cale + rpjloss_fin) / 2 * 0 + scaleloss else: loss = (rpjloss_cale + rpjloss_fin) / 2 * 0.1 + scaleloss metrics = dict() metrics['rpjloss_cale'] = rpjloss_cale.item() metrics['rpjloss_fin'] = rpjloss_fin.item() metrics['scaleloss'] = scaleloss metrics['loss'] = loss if torch.sum(torch.isnan(loss)) > 0: print(data_blob['tag']) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() scheduler.step() # if args.gpu == 0: # print(i_batch, loss.item(), scaleloss, torch.mean(image1)) if args.gpu == 0: logger.write_dict(metrics, step=total_steps) if total_steps % SUM_FREQ == 0: dr = time.time() - st resths = (args.num_steps - total_steps) * dr / (total_steps + 1) / 60 / 60 print("Step: %d, rest hour: %f, depthloss: %f" % (total_steps, resths, loss.item())) logger.write_vls(data_blob, outputs, total_steps) if total_steps % VAL_FREQ == 1: results = validate_kitti(model.module, args, eval_loader, group, seqmap) if args.gpu == 0: logger_evaluation.write_dict(results, total_steps) if minabsl > results['absl']: minabsl = results['absl'] PATH = os.path.join(logroot, 'minabsl.pth') torch.save(model.state_dict(), PATH) print("model saved to %s" % PATH) # if args.gpu == 0: # results = validate_kitti(model.module, args, eval_loader, None, group, total_steps, isorg=True) # logger_evaluation_org.write_dict(results, total_steps) # else: # validate_kitti(model.module, args, eval_loader, None, group, None, isorg=True) model.train() total_steps += 1 if total_steps > args.num_steps: should_keep_training = False break if args.gpu == 0: PATH = os.path.join(logroot, 'epoch_{}.pth'.format(str(epoch).zfill(3))) torch.save(model.state_dict(), PATH) print("model saved to %s" % PATH) epoch = epoch + 1 if args.gpu == 0: logger.close() PATH = os.path.join(logroot, 'final.pth') torch.save(model.state_dict(), PATH) return
def validate_kitti(model, args, eval_loader, logger, group, total_steps, isdeepv2d=False): """ Peform validation using the KITTI-2015 (train) split """ """ Peform validation using the KITTI-2015 (train) split """ model.eval() gpu = args.gpu eval_measures_depth = torch.zeros(10).cuda(device=gpu) err_rec = list() err_rec_deepv2d = list() err_rec_md = list() mv_rec = list() for val_id, data_blob in enumerate(tqdm(eval_loader)): image1 = data_blob['img1'].cuda(gpu) / 255.0 image2 = data_blob['img2'].cuda(gpu) / 255.0 intrinsic = data_blob['intrinsic'].cuda(gpu) insmap = data_blob['insmap'].cuda(gpu) posepred = data_blob['posepred'].cuda(gpu) depthgt = data_blob['depthmap'].cuda(gpu) rel_pose = data_blob['rel_pose'][0].cpu().numpy() gps_scale = np.sqrt(np.sum(rel_pose[0:3, 3]**2)) if not args.initbymD: mD_pred = data_blob['depthpred'].cuda(gpu) else: mD_pred = data_blob['mdDepth_pred'].cuda(gpu) mD_pred_clipped = torch.clamp_min(mD_pred, min=args.min_depth_pred) if not isdeepv2d: outputs = model(image1, image2, mD_pred_clipped, intrinsic, posepred, insmap) predread = outputs[('depth', 2)] else: depthpred_deepv2d = data_blob['depthpred_deepv2d'].cuda(gpu) predread = depthpred_deepv2d # predread = data_blob['mdDepth_pred'].cuda(gpu) selector = ((depthgt > 0) * (predread > 0) * (depthgt > args.min_depth_eval) * (depthgt < args.max_depth_eval)).float() predread = torch.clamp(predread, min=args.min_depth_eval, max=args.max_depth_eval) depth_gt_flatten = depthgt[selector == 1].cpu().numpy() pred_depth_flatten = predread[selector == 1].cpu().numpy() deepv2d_depth_flatten = data_blob['depthpred_deepv2d'][ selector == 1].cpu().numpy() mD_pred_clipped_flatten = mD_pred[selector == 1].cpu().numpy() eval_measures_depth_np = compute_errors(gt=depth_gt_flatten, pred=pred_depth_flatten) eval_measures_depth_deepv2d_np = compute_errors( gt=depth_gt_flatten, pred=deepv2d_depth_flatten) eval_measures_depth_md_np = compute_errors( gt=depth_gt_flatten, pred=mD_pred_clipped_flatten) err_rec.append(eval_measures_depth_np[-3]) mv_rec.append(gps_scale) err_rec_deepv2d.append(eval_measures_depth_deepv2d_np[-3]) err_rec_md.append(eval_measures_depth_md_np[-3]) err_rec = np.array(err_rec) mv_rec = np.array(mv_rec) err_rec_deepv2d = np.array(err_rec_deepv2d) err_rec_md = np.array(err_rec_md) check_dist = np.linspace(0, 3, 200) dist = 0.4 dist_ratio = 0.1 plot_mv = list() plot_err = list() plot_std = list() plot_num = list() for d in check_dist: d_low = d * (1 - dist) d_hig = d * (1 + dist) selector = (mv_rec >= d_low) * (mv_rec <= d_hig) d_low = d * (1 - dist_ratio) d_hig = d * (1 + dist_ratio) selector_ratio = (mv_rec >= d_low) * (mv_rec <= d_hig) if np.sum(selector) < 5: continue else: err1 = np.mean(err_rec[selector]) err2 = np.mean(err_rec_deepv2d[selector]) err3 = np.mean(err_rec_md[selector]) std1 = np.std(err_rec[selector]) std2 = np.std(err_rec_deepv2d[selector]) std3 = np.std(err_rec_md[selector]) plot_err.append(np.array([err1, err2, err3])) plot_std.append(np.array([std1, std2, std3])) plot_mv.append(d) plot_num.append(np.sum(selector_ratio)) plot_err = np.stack(plot_err, axis=0) plot_mv = np.array(plot_mv) plot_std = np.stack(plot_std, axis=0) plot_num = np.stack(plot_num, axis=0) plot_num = plot_num / np.sum(plot_num) thickness = 0.1 fig, ax = plt.subplots() plt.plot(plot_mv, plot_err[:, 0]) plt.plot(plot_mv, plot_err[:, 1]) plt.plot(plot_mv, plot_err[:, 2]) ax.fill_between(plot_mv, plot_err[:, 0] - plot_std[:, 0] * thickness, plot_err[:, 0] + plot_std[:, 0] * thickness, alpha=0.5) ax.fill_between(plot_mv, plot_err[:, 1] - plot_std[:, 1] * thickness, plot_err[:, 1] + plot_std[:, 1] * thickness, alpha=0.5) ax.fill_between(plot_mv, plot_err[:, 2] - plot_std[:, 2] * thickness, plot_err[:, 2] + plot_std[:, 2] * thickness, alpha=0.5) plt.xlabel('scale in meters') plt.ylabel('a1') # plt.legend(['Ours', 'DeepV2D Eight View', 'Bts'], bbox_to_anchor=(0.1, 0.3)) plt.legend(['Ours', 'DeepV2D Eight View', 'Bts'], loc='lower left') ax2 = ax.twinx() ax2.plot(plot_mv, plot_num, c='purple') # plt.legend(['Frame per Scale Percentage'], bbox_to_anchor=(0.6, 0.3)) plt.legend(['Frame per Scale Percentage'], loc='lower right') plt.title("Error curve in KITTI") plt.savefig('/home/shengjie/Desktop/1.png', bbox_inches='tight', pad_inches=0, dpi=150) plt.close() plt.show() ave_err_rec = np.zeros((bins.shape[0], 3)) ave_err_rec_count = np.zeros((bins.shape[0], 1)) for idx, indice in enumerate(indices): ave_err_rec[indice, 0] += err_rec[idx] ave_err_rec[indice, 1] += err_rec_deepv2d[idx] ave_err_rec[indice, 2] += err_rec_md[idx] ave_err_rec_count[indice, 0] += 1 ave_err_rec = ave_err_rec / (ave_err_rec_count + 1e-6) plt.figure() plt.plot(bins, ave_err_rec[:, 0]) plt.plot(bins, ave_err_rec[:, 1]) plt.show() plt.figure() plt.scatter(mv_rec, err_rec) plt.scatter(mv_rec, err_rec_deepv2d) plt.show()
def forward(self, x): return 0.5 * torch.clamp_min(x, 0) ** 2
def expmap0(self, u, c: Curvature): sqrt_c = c.c**0.5 u_norm = torch.clamp_min(u.norm(dim=-1, p=2, keepdim=True), self.min_norm) gamma_1 = tanh(sqrt_c * u_norm) * u / (sqrt_c * u_norm) return gamma_1
def _logmap0(y, c): sqrt_c = c**0.5 y_norm = torch.clamp_min(y.norm(dim=-1, p=2, keepdim=True), 1e-5) return y / y_norm / sqrt_c * artanh(sqrt_c * y_norm)
return (x >= 0).type_as(x) * torch.ones_like(x) H = 4 W = 4 inp = torch.rand(1, 4, H, W) * 2 # 2 / 4 # inp = torch.load('mnist_sample.pt')[None].double() # inference clip = torch.tensor(1) a1 = Variable(inp, requires_grad=True) x = a1 - a1.mean([2, 3], keepdim=True) x.retain_grad() norm = torch.sqrt(torch.mean(x**2, dim=[2, 3], keepdim=True) + 1e-5) norm.retain_grad() inv_cnorm = 1 / torch.clamp_min(norm, clip) inv_cnorm.retain_grad() x_norm = x * inv_cnorm x_norm.retain_grad() c1 = x_norm.abs().sum() c1.retain_grad() c1.backward() # instance norm a2 = Variable(inp, requires_grad=True) m = torch.nn.InstanceNorm2d(4) c2 = m(a2).abs().sum() c2.backward() # calculate gradients d_xnorm = x_norm.grad # * weight
def train(gpu, ngpus_per_node, args): print("Using GPU %d for training" % gpu) args.gpu = gpu model = EppFlowNet(args=args) model = torch.nn.DataParallel(model) model.cuda() print("=> loading checkpoint '{}'".format(args.restore_ckpt)) loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.restore_ckpt, map_location=loc) model.load_state_dict(checkpoint, strict=False) with open( os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'eppflownet/pose_bin{}.pickle'.format( str(int(32 / args.num_angs)))), 'rb') as f: linlogdedge = pickle.load(f) minidx = np.argmin(np.abs(linlogdedge)) print("Min index is :%d, val: %f" % (minidx, linlogdedge[minidx])) entries = read_splits(args) stidx = 0 edidx = len(entries) eval_dataset = KITTI_odom(root=args.dataset_root, odomroot=args.odomroot, inheight=args.evalheight, inwidth=args.evalwidth, entries=entries[stidx:edidx], maxinsnum=args.maxinsnum, linlogdedge=linlogdedge, num_samples=args.num_angs, prediction_root=args.prediction_root, ins_root=args.ins_root, mdPred_root=args.mdPred_root, RANSACPose_root=args.RANSACPose_root, istrain=False, isgarg=True) eval_loader = data.DataLoader(eval_dataset, batch_size=1, pin_memory=True, num_workers=3, drop_last=False, shuffle=False) model.eval() totnum = 0 dr = 0 with torch.no_grad(): for val_id, data_blob in enumerate(eval_loader): image1 = data_blob['img1'].cuda(gpu) / 255.0 image2 = data_blob['img2'].cuda(gpu) / 255.0 intrinsic = data_blob['intrinsic'].cuda(gpu) insmap = data_blob['insmap'].cuda(gpu) mD_pred = data_blob['mdDepth_pred'].cuda(gpu) ang_decps_pad = data_blob['ang_decps_pad'].cuda(gpu) scl_decps_pad = data_blob['scl_decps_pad'].cuda(gpu) mvd_decps_pad = data_blob['mvd_decps_pad'].cuda(gpu) posepred = data_blob['posepred'].cuda(gpu) posepred = posepred[:, :, 0] ang_decps_pad = ang_decps_pad[:, :, 0] scl_decps_pad = scl_decps_pad[:, :, 0] mvd_decps_pad = mvd_decps_pad[:, :, 0] mD_pred_clipped = torch.clamp_min(mD_pred, min=args.min_depth_pred) st = time.time() outputs = model(image1, image2, mD_pred_clipped, intrinsic, posepred, ang_decps_pad, scl_decps_pad, mvd_decps_pad, insmap) dr += time.time() - st totnum += 1 print("%d Samples, Ave sec/frame: %f, Mem: %f Gb" % (totnum, dr / totnum, float(torch.cuda.memory_allocated() / 1024 / 1024 / 1024))) return
def train(gpu, ngpus_per_node, args): print("Using GPU %d for training" % gpu) args.gpu = gpu if args.distributed: dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=ngpus_per_node, rank=args.gpu) model = EppFlowNet(args=args) if args.distributed: torch.cuda.set_device(args.gpu) args.batch_size = int(args.batch_size / ngpus_per_node) model = nn.SyncBatchNorm.convert_sync_batchnorm(module=model) model = model.to(f'cuda:{args.gpu}') model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu], find_unused_parameters=True, output_device=args.gpu) else: model = torch.nn.DataParallel(model) model.cuda() logroot = os.path.join(args.logroot, args.name) print("Parameter Count: %d, saving location: %s" % (count_parameters(model), logroot)) if args.restore_ckpt is not None: print("=> loading checkpoint '{}'".format(args.restore_ckpt)) loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.restore_ckpt, map_location=loc) model.load_state_dict(checkpoint, strict=False) model.train() train_entries, evaluation_entries = read_splits() train_dataset = KITTI_eigen(root=args.dataset_root, inheight=args.inheight, inwidth=args.inwidth, entries=train_entries, maxinsnum=args.maxinsnum, depth_root=args.depth_root, depthvls_root=args.depthvlsgt_root, prediction_root=args.prediction_root, ins_root=args.ins_root, istrain=True, muteaug=False, banremovedup=False, isgarg=False) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) if args.distributed else None train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, pin_memory=True, num_workers=int(args.num_workers / ngpus_per_node), drop_last=True, sampler=train_sampler) eval_dataset = KITTI_eigen(root=args.dataset_root, inheight=args.evalheight, inwidth=args.evalwidth, entries=evaluation_entries, maxinsnum=args.maxinsnum, depth_root=args.depth_root, depthvls_root=args.depthvlsgt_root, prediction_root=args.prediction_root, ins_root=args.ins_root, istrain=False, isgarg=True) eval_sampler = torch.utils.data.distributed.DistributedSampler( eval_dataset) if args.distributed else None eval_loader = data.DataLoader(eval_dataset, batch_size=1, pin_memory=True, num_workers=3, drop_last=True, sampler=eval_sampler) print( "Training splits contain %d images while test splits contain %d images" % (train_dataset.__len__(), eval_dataset.__len__())) if args.distributed: group = dist.new_group([i for i in range(ngpus_per_node)]) optimizer, scheduler = fetch_optimizer(args, model, int(train_dataset.__len__() / 2)) total_steps = 0 if args.gpu == 0: logger = Logger(logroot) logger_evaluation = Logger( os.path.join(args.logroot, 'evaluation_eigen_background', args.name)) logger_evaluation_org = Logger( os.path.join(args.logroot, 'evaluation_eigen_background', "{}_org".format(args.name))) logger.create_summarywriter() logger_evaluation.create_summarywriter() logger_evaluation_org.create_summarywriter() VAL_FREQ = 5000 epoch = 0 maxa1 = 0 silog_criterion = silog_loss(variance_focus=args.variance_focus) st = time.time() should_keep_training = True while should_keep_training: train_sampler.set_epoch(epoch) for i_batch, data_blob in enumerate(train_loader): optimizer.zero_grad() image1 = data_blob['img1'].cuda(gpu) / 255.0 image2 = data_blob['img2'].cuda(gpu) / 255.0 intrinsic = data_blob['intrinsic'].cuda(gpu) insmap = data_blob['insmap'].cuda(gpu) depthgt = data_blob['depthmap'].cuda(gpu) posepred = data_blob['posepred'].cuda(gpu) mD_pred = data_blob['depthpred'].cuda(gpu) mD_pred_clipped = torch.clamp_min(mD_pred, min=args.min_depth_pred) outputs = model(image1, image2, mD_pred_clipped, intrinsic, posepred, insmap) depthloss, depthselector = get_depth_loss( depthgt=depthgt, mD_pred=mD_pred, outputs=outputs, silog_criterion=silog_criterion) metrics = dict() metrics['depthloss'] = depthloss.item() loss = depthloss loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() scheduler.step() if args.gpu == 0: logger.write_dict(metrics, step=total_steps) if total_steps % SUM_FREQ == 0: dr = time.time() - st resths = (args.num_steps - total_steps) * dr / (total_steps + 1) / 60 / 60 print("Step: %d, rest hour: %f, depthloss: %f" % (total_steps, resths, depthloss.item())) logger.write_vls(data_blob, outputs, depthselector, total_steps) if total_steps % VAL_FREQ == 1: if args.gpu == 0: results = validate_kitti(model.module, args, eval_loader, logger, group, total_steps, isorg=False) else: results = validate_kitti(model.module, args, eval_loader, None, group, None, isorg=False) if args.gpu == 0: logger_evaluation.write_dict(results, total_steps) if maxa1 < results['d1']: maxa1 = results['d1'] PATH = os.path.join(logroot, 'maxa1.pth') torch.save(model.state_dict(), PATH) print("model saved to %s" % PATH) if args.gpu == 0: results = validate_kitti(model.module, args, eval_loader, None, group, total_steps, isorg=True) logger_evaluation_org.write_dict(results, total_steps) else: validate_kitti(model.module, args, eval_loader, None, group, None, isorg=True) model.train() total_steps += 1 if total_steps > args.num_steps: should_keep_training = False break epoch = epoch + 1 if args.gpu == 0: logger.close() PATH = os.path.join(logroot, 'final.pth') torch.save(model.state_dict(), PATH) return
def inference(self, tokens, token_lengths, mels_for_prosody, mel_lengths_for_prosody, speakers, mels_for_ge2e, pitches, pitch_lengths, noise_scale=1.0, length_scale=1.0): ''' For inference. token: [Batch, Token_t] # Input text token_lengths: [Batch] # Length of input text mels_for_prosody: [Batch, Mel_d, Mel_t] # Input of prosody encoder mel_lengths_for_prosody: [Batch] # Length of input mel for prosody speakers: [Batch] or None # Indice of speaker. Only when hp.Speaker_Embedding.Type.upper() == 'LUT' mels_for_ge2e: [Batch * Samples, Mel_d, Mel_SE_t] # Input of speaker embedding noise_scale: scalar of float length_scale: scalar of float or [Batch]. (I may change this to matrix to control speed letter by letter later) ''' if 'LUT' in self.layer_Dict.keys(): speakers = self.layer_Dict['LUT'](speakers) elif 'GE2E' in self.layer_Dict.keys(): speakers = self.layer_Dict['GE2E'](mels_for_ge2e) speakers = GE2E_Normalize(speakers) else: speakers = None if 'Prosody_Encoder' in self.layer_Dict.keys(): prosodies = self.layer_Dict['Prosody_Encoder']( mels_for_prosody, mel_lengths_for_prosody) else: prosodies = None if hp.Device != '-1': torch.cuda.synchronize() token_Masks = self.Mask_Generate(token_lengths) mean, log_Std, log_Durations, mask = self.layer_Dict['Encoder']( tokens, token_Masks, speakers, prosodies) length_scale = length_scale.unsqueeze(-1).unsqueeze(-1) if hp.Device != '-1': torch.cuda.synchronize() durations = torch.ceil(torch.exp(log_Durations) * mask * length_scale).squeeze(1) mel_Lengths = torch.clamp_min(torch.sum(durations, dim=1), 1.0).long() mel_Masks = self.Mask_Generate(mel_Lengths) attention_Masks = torch.unsqueeze(token_Masks, -1) * torch.unsqueeze( mel_Masks, 2) attention_Masks = attention_Masks.squeeze(1) attentions = self.Path_Generate( durations, attention_Masks) # [Batch, Token_t, Mel_t] if hp.Device != '-1': torch.cuda.synchronize() mel_Mean = mean @ attentions # [Batch, Mel_Dim, Token_t] @ [Batch, Token_t, Mel_t] -> [Batch, Mel_dim, Mel_t] mel_Log_Std = log_Std @ attentions # [Batch, Mel_Dim, Token_t] @ [Batch, Token_t, Mel_t] -> [Batch, Mel_dim, Mel_t] noises = torch.randn_like(mel_Mean) * noise_scale if hp.Device != '-1': torch.cuda.synchronize() z = (mel_Mean + torch.exp(mel_Log_Std) * noises) * mel_Masks if 'Pitch_Interpolater' in self.layer_Dict.keys(): pitches = self.layer_Dict['Pitch_Interpolater'](pitches, pitch_lengths, mel_Lengths) else: pitches = None mels, _, mel_Masks = self.layer_Dict['Decoder'](z, mel_Masks, speakers, prosodies, pitches, reverse=True) if hp.Device != '-1': torch.cuda.synchronize() mels.masked_fill_(mel_Masks == 0.0, -hp.Sound.Max_Abs_Mel) return mels, mel_Lengths, attentions
def forward(self, input): return torch.clamp_min(input, self.min)
def _get_param(self, sp, sn): ap = torch.clamp_min(1 + self.m - sp, min=0.) an = torch.clamp_min(sn + self.m, min=0.) dp = 1 - self.m dn = self.m return ap, an, dp, dn
def _metrics( self, true_durs, true_text_len, pred_durs, true_pitch, pred_pitch, true_spect=None, pred_spect=None, true_spect_len=None, attn_logprob=None, attn_soft=None, attn_hard=None, attn_hard_dur=None, ): text_mask = get_mask_from_lengths(true_text_len) mel_mask = get_mask_from_lengths(true_spect_len) loss = 0.0 # Dur loss and metrics durs_loss = F.mse_loss(pred_durs, (true_durs + 1).float().log(), reduction='none') durs_loss = durs_loss * text_mask.float() durs_loss = durs_loss.sum() / text_mask.sum() durs_pred = pred_durs.exp() - 1 durs_pred = torch.clamp_min(durs_pred, min=0) durs_pred = durs_pred.round().long() acc = ((true_durs == durs_pred) * text_mask).sum().float() / text_mask.sum() * 100 acc_dist_1 = (((true_durs - durs_pred).abs() <= 1) * text_mask).sum().float() / text_mask.sum() * 100 acc_dist_3 = (((true_durs - durs_pred).abs() <= 3) * text_mask).sum().float() / text_mask.sum() * 100 pred_spect = pred_spect.transpose(1, 2) # Mel loss mel_loss = F.mse_loss(pred_spect, true_spect, reduction='none').mean(dim=-2) mel_loss = mel_loss * mel_mask.float() mel_loss = mel_loss.sum() / mel_mask.sum() loss = loss + self.durs_loss_scale * durs_loss + self.mel_loss_scale * mel_loss # Aligner loss bin_loss, ctc_loss = None, None ctc_loss = self.forward_sum_loss(attn_logprob=attn_logprob, in_lens=true_text_len, out_lens=true_spect_len) loss = loss + ctc_loss if self.add_bin_loss: bin_loss = self.bin_loss(hard_attention=attn_hard, soft_attention=attn_soft) loss = loss + self.bin_loss_scale * bin_loss true_avg_pitch = average_pitch(true_pitch.unsqueeze(1), attn_hard_dur).squeeze(1) # Pitch loss pitch_loss = F.mse_loss(pred_pitch, true_avg_pitch, reduction='none') # noqa pitch_loss = (pitch_loss * text_mask).sum() / text_mask.sum() loss = loss + self.pitch_loss_scale * pitch_loss return loss, durs_loss, acc, acc_dist_1, acc_dist_3, pitch_loss, mel_loss, ctc_loss, bin_loss
def trunc_mae(a, b, thres=0.01): # mean absolute error return torch.clamp_min(torch.abs(a - b), thres).mean()
def inference( self, text: torch.Tensor, text_lengths: torch.Tensor, feats: Optional[torch.Tensor] = None, feats_lengths: Optional[torch.Tensor] = None, sids: Optional[torch.Tensor] = None, spembs: Optional[torch.Tensor] = None, lids: Optional[torch.Tensor] = None, dur: Optional[torch.Tensor] = None, noise_scale: float = 0.667, noise_scale_dur: float = 0.8, alpha: float = 1.0, max_len: Optional[int] = None, use_teacher_forcing: bool = False, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """Run inference. Args: text (Tensor): Input text index tensor (B, T_text,). text_lengths (Tensor): Text length tensor (B,). feats (Tensor): Feature tensor (B, aux_channels, T_feats,). feats_lengths (Tensor): Feature length tensor (B,). sids (Optional[Tensor]): Speaker index tensor (B,) or (B, 1). spembs (Optional[Tensor]): Speaker embedding tensor (B, spk_embed_dim). lids (Optional[Tensor]): Language index tensor (B,) or (B, 1). dur (Optional[Tensor]): Ground-truth duration (B, T_text,). If provided, skip the prediction of durations (i.e., teacher forcing). noise_scale (float): Noise scale parameter for flow. noise_scale_dur (float): Noise scale parameter for duration predictor. alpha (float): Alpha parameter to control the speed of generated speech. max_len (Optional[int]): Maximum length of acoustic feature sequence. use_teacher_forcing (bool): Whether to use teacher forcing. Returns: Tensor: Generated waveform tensor (B, T_wav). Tensor: Monotonic attention weight tensor (B, T_feats, T_text). Tensor: Duration tensor (B, T_text). """ # encoder x, m_p, logs_p, x_mask = self.text_encoder(text, text_lengths) g = None if self.spks is not None: # (B, global_channels, 1) g = self.global_emb(sids.view(-1)).unsqueeze(-1) if self.spk_embed_dim is not None: # (B, global_channels, 1) g_ = self.spemb_proj(F.normalize(spembs.unsqueeze(0))).unsqueeze(-1) if g is None: g = g_ else: g = g + g_ if self.langs is not None: # (B, global_channels, 1) g_ = self.lang_emb(lids.view(-1)).unsqueeze(-1) if g is None: g = g_ else: g = g + g_ if use_teacher_forcing: # forward posterior encoder z, m_q, logs_q, y_mask = self.posterior_encoder(feats, feats_lengths, g=g) # forward flow z_p = self.flow(z, y_mask, g=g) # (B, H, T_feats) # monotonic alignment search s_p_sq_r = torch.exp(-2 * logs_p) # (B, H, T_text) # (B, 1, T_text) neg_x_ent_1 = torch.sum( -0.5 * math.log(2 * math.pi) - logs_p, [1], keepdim=True, ) # (B, T_feats, H) x (B, H, T_text) = (B, T_feats, T_text) neg_x_ent_2 = torch.matmul( -0.5 * (z_p**2).transpose(1, 2), s_p_sq_r, ) # (B, T_feats, H) x (B, H, T_text) = (B, T_feats, T_text) neg_x_ent_3 = torch.matmul( z_p.transpose(1, 2), (m_p * s_p_sq_r), ) # (B, 1, T_text) neg_x_ent_4 = torch.sum( -0.5 * (m_p**2) * s_p_sq_r, [1], keepdim=True, ) # (B, T_feats, T_text) neg_x_ent = neg_x_ent_1 + neg_x_ent_2 + neg_x_ent_3 + neg_x_ent_4 # (B, 1, T_feats, T_text) attn_mask = torch.unsqueeze(x_mask, 2) * torch.unsqueeze(y_mask, -1) # monotonic attention weight: (B, 1, T_feats, T_text) attn = self.maximum_path( neg_x_ent, attn_mask.squeeze(1), ).unsqueeze(1) dur = attn.sum(2) # (B, 1, T_text) # forward decoder with random segments wav = self.decoder(z * y_mask, g=g) else: # duration if dur is None: logw = self.duration_predictor( x, x_mask, g=g, inverse=True, noise_scale=noise_scale_dur, ) w = torch.exp(logw) * x_mask * alpha dur = torch.ceil(w) y_lengths = torch.clamp_min(torch.sum(dur, [1, 2]), 1).long() y_mask = make_non_pad_mask(y_lengths).unsqueeze(1).to(text.device) attn_mask = torch.unsqueeze(x_mask, 2) * torch.unsqueeze(y_mask, -1) attn = self._generate_path(dur, attn_mask) # expand the length to match with the feature sequence # (B, T_feats, T_text) x (B, T_text, H) -> (B, H, T_feats) m_p = torch.matmul( attn.squeeze(1), m_p.transpose(1, 2), ).transpose(1, 2) # (B, T_feats, T_text) x (B, T_text, H) -> (B, H, T_feats) logs_p = torch.matmul( attn.squeeze(1), logs_p.transpose(1, 2), ).transpose(1, 2) # decoder z_p = m_p + torch.randn_like(m_p) * torch.exp(logs_p) * noise_scale z = self.flow(z_p, y_mask, g=g, inverse=True) wav = self.decoder((z * y_mask)[:, :, :max_len], g=g) return wav.squeeze(1), attn.squeeze(1), dur.squeeze(1)
def forward(self, x: Tensor, targets: Tensor) -> Tensor: # cal sp, sn from x batchSize = x.size(0) reordered_x = torch.cat((x.narrow(0, int(batchSize // 3), int(batchSize // 3)), \ x.narrow(0, int(2 * batchSize // 3), int(batchSize // 3)), x.narrow(0, 0, int(batchSize // 3))), 0) # # # regularization loss pos = (x * reordered_x.data).sum(1).div_(self.T).exp_() # get all innerproduct, remove diag all_prob = torch.mm(x, x.t().data).div_(self.T).exp_() * self.diag_mat all_div = all_prob.sum(1) lnPmt = torch.div(pos, all_div) # negative probability Pon_div = all_div.repeat(batchSize, 1) lnPon = torch.div(all_prob, Pon_div.t()) lnPon = -lnPon.add(-1) sp = lnPmt sn = lnPon lnPon = lnPon.log_() lnPmt = lnPmt.log_() # exit(0) # # ######################################## add alpha, beta ap = torch.clamp_min(-lnPmt.detach() + 1 + self.m, min=0.) an = torch.clamp_min(lnPon.detach() + self.m, min=0.) # delta_p = 1 - self.m delta_n = self.m # lnPmt = -ap * (sp - delta_p) * self.gamma lnPon = an * (sn - delta_n) * self.gamma lnPmt = lnPmt.exp_() lnPon = lnPon.exp_() ##################################################### # equation 7 in ref. A (NCE paper) lnPon.log_() # also remove the pos term lnPon = lnPon.sum(1) - (-lnPmt.add(-1)).log_() lnPmt.log_() lnPmtsum = lnPmt.sum(0) lnPonsum = lnPon.sum(0) print(lnPmtsum, lnPonsum) loss = -(lnPmtsum + lnPonsum) / batchSize print("loss", loss) exit(0) sp = lnPmt sn = lnPon ap = torch.clamp_min(-sp.detach() + 1 + self.m, min=0.) an = torch.clamp_min(sn.detach() + self.m, min=0.) delta_p = 1 - self.m delta_n = self.m # logit_p = - ap * (sp - delta_p) * self.gamma # logit_n = an * (sn - delta_n) * self.gamma logit_p = -sp logit_n = sn print(logit_n) print(logit_p) print(logit_n.shape) print(logit_p.shape) loss = self.soft_plus( torch.logsumexp(logit_n, dim=0) + torch.logsumexp(logit_p, dim=0)) print("loss", loss / 300) exit(0)
def _expmap0(u, c): sqrt_c = c**0.5 u_norm = torch.clamp_min(u.norm(dim=-1, p=2, keepdim=True), 1e-5) gamma_1 = tanh(sqrt_c * u_norm) * u / (sqrt_c * u_norm) return gamma_1
def forward(self, predicts, targets, embeds, step): """Computes loss. Parameters ---------- predicts: torch.Tensor Predicted labels targets: torch.Tensor True labels embeds: torch.Tensor Embeddings of the inputs step: int Value to compute the annealing factor for triplet loss. Returns ------- torch.Tensor computed loss """ alpha = self.params['alpha'] margin = self.params['margin'] n_mini_batch_size = embeds[0].shape[0] // 2 # ce loss ce_loss = torch.nn.CrossEntropyLoss()(predicts, targets) Triplet_loss_weight = anneal_function('logistic', step, self.params['triplet_anneal_k'], self.params['triplet_anneal_b']) if self.params['type'] == 'sdtw': # DTWLoss (want to minimize dtw between duplicates and maximize dtw between non-duplicates) DTW_loss = torch.tensor([0]).float().to(embeds[0].device) for k in range(n_mini_batch_size): DTW_loss += torch.nn.functional.relu( self.sdtw(embeds[0][k], embeds[1][k]) - self.sdtw(embeds[0][k + n_mini_batch_size], embeds[1][ k + n_mini_batch_size]) + margin) DTW_loss /= (n_mini_batch_size) Triplet_loss = DTW_loss loss = alpha * ce_loss + ( 1. - alpha) * Triplet_loss * Triplet_loss_weight elif self.params['loss_type'] == 'l2': L2_loss = torch.nn.functional.relu( torch.sum((embeds[0][:n_mini_batch_size, -1, :] - embeds[1][:n_mini_batch_size, -1, :])**2, dim=-1) - torch.sum((embeds[0][:n_mini_batch_size, -1, :] - embeds[1][n_mini_batch_size:, -1, :])**2, dim=-1) + margin).sum() L2_loss /= n_mini_batch_size loss = alpha * ce_loss + (1. - alpha) * L2_loss * Triplet_loss_weight elif self.params['loss_type'] == 'cos_hinge': Cos_hinge_loss = torch.clamp_min( -torch.nn.CosineSimilarity(dim=-1) (embeds[0][:n_mini_batch_size, -1, :], embeds[1][:n_mini_batch_size, -1, :]) + torch.nn.CosineSimilarity(dim=-1)( embeds[0][:n_mini_batch_size, -1, :], embeds[1][n_mini_batch_size:, -1, :]) + margin, 0).sum() Cos_hinge_loss += torch.clamp_min( -torch.nn.CosineSimilarity(dim=-1) (embeds[0][:n_mini_batch_size, -1, :], embeds[1][:n_mini_batch_size, -1, :]) + torch.nn.CosineSimilarity(dim=-1)( embeds[1][:n_mini_batch_size, -1, :], embeds[1][n_mini_batch_size:, -1, :]) + margin, 0).sum() Cos_hinge_loss /= (2 * n_mini_batch_size) loss = alpha * ce_loss + ( 1. - alpha) * Cos_hinge_loss * Triplet_loss_weight elif self.params['loss_type'] == 'ce': loss = ce_loss else: raise KeyError(f"Unknown loss type: {self.params['loss_type']}") return loss
def _project(x, c): norm = torch.clamp_min(x.norm(dim=-1, keepdim=True, p=2), 1e-5) maxnorm = (1 - 1e-3) cond = norm > maxnorm projected = x / norm * maxnorm return torch.where(cond, projected, x)
def tanh_grad(x): x = torch.clamp_min(1 - torch.tanh(x)**2, min=1e-5) return torch.log(x)
def forward(self, *, x, x_len, dur_target=None, pitch_target=None, energy_target=None, spec_len=None): """ Args: x: Input from the encoder. x_len: Length of the input. dur_target: Duration targets for the duration predictor. Needs to be passed in during training. pitch_target: Pitch targets for the pitch predictor. Needs to be passed in during training. energy_target: Energy targets for the energy predictor. Needs to be passed in during training. spec_len: Target spectrogram length. Needs to be passed in during training. """ # Duration predictions (or ground truth) fed into Length Regulator to # expand the hidden states of the encoder embedding log_dur_preds = self.duration_predictor(x) log_dur_preds.masked_fill_(~get_mask_from_lengths(x_len), 0) # Output is Batch, Time if dur_target is not None: dur_out = self.length_regulator(x, dur_target) else: dur_preds = torch.clamp_min( torch.round(torch.exp(log_dur_preds)) - 1, 0).long() if not torch.sum(dur_preds, dim=1).bool().all(): logging.error( "Duration prediction failed on this batch. Settings to 1s") dur_preds += 1 dur_out = self.length_regulator(x, dur_preds) spec_len = torch.sum(dur_preds, dim=1) out = dur_out out *= get_mask_from_lengths(spec_len).unsqueeze(-1) # Pitch pitch_preds = None if self.pitch: # Possible future work: # Add pitch spectrogram prediction & conversion back to pitch contour using iCWT # (see Appendix C of the FastSpeech 2/2s paper). pitch_preds = self.pitch_predictor(dur_out) pitch_preds.masked_fill_(~get_mask_from_lengths(spec_len), 0) if pitch_target is not None: pitch_out = self.pitch_lookup( torch.bucketize(pitch_target, self.pitch_bins)) else: pitch_out = self.pitch_lookup( torch.bucketize(pitch_preds.detach(), self.pitch_bins)) out += pitch_out out *= get_mask_from_lengths(spec_len).unsqueeze(-1) # Energy energy_preds = None if self.energy: energy_preds = self.energy_predictor(dur_out) if energy_target is not None: energy_out = self.energy_lookup( torch.bucketize(energy_target, self.energy_bins)) else: energy_out = self.energy_lookup( torch.bucketize(energy_preds.detach(), self.energy_bins)) out += energy_out out *= get_mask_from_lengths(spec_len).unsqueeze(-1) return out, log_dur_preds, pitch_preds, energy_preds, spec_len
def compute_loss_v3(self, preds, ground_truth): """ :param preds: [batch_size, 125, 13, 13] :param ground_truth: [batch_size, 13, 13, 5, 25] :return: """ # grid_size format is [h, w] grid_size = [preds.shape[2], preds.shape[3]] # ratio's format is [h, w] ratio = torch.tensor( [self.img_size[0] / grid_size[0], self.img_size[1] / grid_size[1]], dtype=torch.float32) ratio = ratio.to(opt.device) xy_offset, pred_bboxes, pred_confs, pred_classes = self.reorg_layer( preds) # obj_mask记录存在目标的cell # obj_mask: [batch_size, 13, 13, 5] obj_mask = ground_truth[..., 4].to(torch.bool) # ignore_mask:记录bbox iou不满足条件的预测框位置 # ignore_mask: [batch_size, 13, 13, 5] ignore_mask = torch.empty_like(obj_mask) for i in range(preds.size(0)): # [13, 13, 5, 4] & [13, 13, 5] -> [M/4, 4] # valid_bbox: [M, 4] valid_bbox = ground_truth[i, ..., :4][obj_mask[i]] # valid_bbox = torch.masked_select(ground_truth[i, ..., :4], obj_mask[i, ..., None]).reshape(-1, 4) # ious: [13, 13, 5, M] # [13, 13, 5, 4] & [M, 4] -> [13, 13, 5, M] ious = yolov2_bbox_iou(pred_bboxes[i], valid_bbox) # best_iou: [13, 13, 5] best_iou = torch.max(ious, dim=-1)[0] ignore_mask[i] = torch.lt(best_iou, opt.best_iou_threshold) # pred_xy: [batch_size, 13, 13, 5, 2] # pred_xy's and label_xy's format is [w, h] # 因为pred_bboxes经过reorg_layer处理后rescale到了input_img的scale,在计算loss时需要把pred_xy的scale缩放到grid的scale pred_xy = pred_bboxes[..., 0:2] / ratio.flip((0, )) - xy_offset true_xy = ground_truth[..., 0:2] / ratio.flip((0, )) - xy_offset # pred_wh: [batch_size, 13, 13, 5, 2] # 这里除以anchor是因为reorg_layer函数处理后对predict_bbox_wh乘以了anchor,这里只是还原模型最初输出的预测值 pred_twth = pred_bboxes[..., 2:4] / self.anchors true_twth = ground_truth[..., 2:4] / self.anchors # for numercial stability # 防止等于0的值在进行对数运算时得到负无穷 pred_twth[(pred_twth == 0.).nonzero()] = 1. true_twth[(true_twth == 0.).nonzero()] = 1. pred_twth = torch.clamp_min(pred_twth, min=1e-9) true_twth = torch.clamp_min(true_twth, min=1e-9) # 这里取对数是因为reorg_layer对pred_wh进行了exponential运算 pred_twth = torch.log(pred_twth) true_twth = torch.log(true_twth) # box with smaller area has higer weight # [batch_size, 13, 13, 5] box_loss_scale = 2. - (ground_truth[..., 2] / self.img_size[1]) * ( ground_truth[..., 3] / self.img_size[0]) # 对存在目标的预测框计算xy和wh损失 # [batch_size, 13, 13, 5, 2] & [batch_size, 13, 13, 5, 1] & [batch_size, 13, 13, 5, 1] -> [batch_size,13,13,5,1] obj_mask = obj_mask[..., None].to(torch.float32) xy_loss = torch.sum( torch.pow(true_xy - pred_xy, 2.) * obj_mask * box_loss_scale[..., None]) wh_loss = torch.sum( torch.pow(true_twth - pred_twth, 2.) * obj_mask * box_loss_scale[..., None]) # 对存在目标的预测框计算置信度损失 # [batch_size, 13, 13, 5, 1] & ([batch_size,13,13,5,1] & [batch_size,13,13,5,1] -> [batch_size,13,13,5,1] bce_loss = torch.nn.BCEWithLogitsLoss(reduction='none') conf_loss_obj = obj_mask * bce_loss(pred_confs, obj_mask) # 对不存在目标且bbox iou也不符合要求的预测框计算置信度损失 # [batch_size,13,13,5,1] & [batch_size,13,13,5,1] & [batch_size,13,13,5,1] -> [batch_size,13,13,5,1] # ignore_mask: [batch_size, 13, 13, 5, 1] ignore_mask = ignore_mask[..., None].to(torch.float32) conf_loss_noobj = (1. - obj_mask) * ignore_mask * bce_loss( pred_confs, obj_mask) # 对不存在目标但与gt_bbox的iou满足条件的预测框不计入损失 # total conf loss # [batch_size, 13, 13, 5, 1] conf_loss = conf_loss_obj + conf_loss_noobj if opt.use_focal_loss: focal_mask = self.focal_loss(labels=obj_mask, preds=pred_confs) conf_loss = torch.sum(focal_mask * conf_loss) else: conf_loss = torch.sum(conf_loss) # 对存在目标的预测框计算分类损失 if opt.use_smooth_labels: true_classes = self.smooth_labels(ground_truth[..., 5:], opt.coco_class_num) else: true_classes = ground_truth[..., 5:] # [batch_size,13,13,5] & [batch_size,13,13,5,20] & [batch_size,13,13,5,20] -> [batch_size,13,13,5,20] class_loss = torch.sum(obj_mask * bce_loss(pred_classes, true_classes)) # get loss of single img total_loss = (xy_loss + wh_loss + conf_loss + class_loss) / opt.batch_size loss_dict = { 'total_loss': total_loss, 'xy_loss': xy_loss / opt.batch_size, 'wh_loss': wh_loss / opt.batch_size, 'conf_loss': conf_loss / opt.batch_size, 'class_loss': class_loss / opt.batch_size } return loss_dict
def proj(self, x, c): norm = torch.clamp_min(x.norm(dim=-1, keepdim=True, p=2), self.min_norm) maxnorm = (1 - self.eps[x.dtype]) / (c ** 0.5) cond = norm > maxnorm projected = x / norm * maxnorm return torch.where(cond, projected, x)
def forward(ctx, x: Tensor, min_val: float) -> Tensor: y = torch.clamp_min(x, min_val) return y
def validate_kitti(model, args, eval_loader, logger, group, total_steps, isdeepv2d=False): """ Peform validation using the KITTI-2015 (train) split """ """ Peform validation using the KITTI-2015 (train) split """ model.eval() gpu = args.gpu eval_measures_depth = torch.zeros(10).cuda(device=gpu) for val_id, data_blob in enumerate(tqdm(eval_loader)): image1 = data_blob['img1'].cuda(gpu) / 255.0 image2 = data_blob['img2'].cuda(gpu) / 255.0 intrinsic = data_blob['intrinsic'].cuda(gpu) insmap = data_blob['insmap'].cuda(gpu) posepred = data_blob['posepred'].cuda(gpu) depthgt = data_blob['depthmap'].cuda(gpu) if not args.initbymD: mD_pred = data_blob['depthpred'].cuda(gpu) else: mD_pred = data_blob['mdDepth_pred'].cuda(gpu) mD_pred_clipped = torch.clamp_min(mD_pred, min=args.min_depth_pred) if not isdeepv2d: outputs = model(image1, image2, mD_pred_clipped, intrinsic, posepred, insmap) predread = outputs[('depth', 2)] else: depthpred_deepv2d = data_blob['depthpred_deepv2d'].cuda(gpu) predread = depthpred_deepv2d # predread = data_blob['mdDepth_pred'].cuda(gpu) selector = ((depthgt > 0) * (predread > 0) * (depthgt > args.min_depth_eval) * (depthgt < args.max_depth_eval)).float() predread = torch.clamp(predread, min=args.min_depth_eval, max=args.max_depth_eval) depth_gt_flatten = depthgt[selector == 1].cpu().numpy() pred_depth_flatten = predread[selector == 1].cpu().numpy() pred_depth_flatten = np.median( depth_gt_flatten / pred_depth_flatten) * pred_depth_flatten eval_measures_depth_np = compute_errors(gt=depth_gt_flatten, pred=pred_depth_flatten) eval_measures_depth[:9] += torch.tensor(eval_measures_depth_np).cuda( device=gpu) eval_measures_depth[9] += 1 if args.distributed: dist.all_reduce(tensor=eval_measures_depth, op=dist.ReduceOp.SUM, group=group) if args.gpu == 0: eval_measures_depth[ 0:9] = eval_measures_depth[0:9] / eval_measures_depth[9] eval_measures_depth = eval_measures_depth.cpu().numpy() print('Computing Depth errors for %f eval samples' % (eval_measures_depth[9].item())) print("{:>7}, {:>7}, {:>7}, {:>7}, {:>7}, {:>7}, {:>7}, {:>7}, {:>7}". format('silog', 'abs_rel', 'log10', 'rms', 'sq_rel', 'log_rms', 'd1', 'd2', 'd3')) for i in range(8): print('{:7.3f}, '.format(eval_measures_depth[i]), end='') print('{:7.3f}'.format(eval_measures_depth[8])) return { 'silog': float(eval_measures_depth[0]), 'abs_rel': float(eval_measures_depth[1]), 'log10': float(eval_measures_depth[2]), 'rms': float(eval_measures_depth[3]), 'sq_rel': float(eval_measures_depth[4]), 'log_rms': float(eval_measures_depth[5]), 'd1': float(eval_measures_depth[6]), 'd2': float(eval_measures_depth[7]), 'd3': float(eval_measures_depth[8]) } else: return None
def forward(self, x, x_mask, dr=None, g=None, reverse=False, noise_scale=1.0): """ Shapes: - x: :math:`[B, C, T]` - x_mask: :math:`[B, 1, T]` - dr: :math:`[B, 1, T]` - g: :math:`[B, C]` """ # condition encoder text x = self.pre(x) if g is not None: x = x + self.cond(g) x = self.convs(x, x_mask) x = self.proj(x) * x_mask if not reverse: flows = self.flows assert dr is not None # condition encoder duration h = self.post_pre(dr) h = self.post_convs(h, x_mask) h = self.post_proj(h) * x_mask noise = ( torch.randn(dr.size(0), 2, dr.size(2)).to( device=x.device, dtype=x.dtype ) * x_mask ) z_q = noise # posterior encoder logdet_tot_q = 0.0 for idx, flow in enumerate(self.post_flows): z_q, logdet_q = flow(z_q, x_mask, g=(x + h)) logdet_tot_q = logdet_tot_q + logdet_q if idx > 0: z_q = torch.flip(z_q, [1]) z_u, z_v = torch.split(z_q, [1, 1], 1) u = torch.sigmoid(z_u) * x_mask z0 = (dr - u) * x_mask # posterior encoder - neg log likelihood logdet_tot_q += torch.sum( (F.logsigmoid(z_u) + F.logsigmoid(-z_u)) * x_mask, [1, 2] ) nll_posterior_encoder = ( torch.sum( -0.5 * (math.log(2 * math.pi) + (noise ** 2)) * x_mask, [1, 2] ) - logdet_tot_q ) z0 = torch.log(torch.clamp_min(z0, 1e-5)) * x_mask logdet_tot = torch.sum(-z0, [1, 2]) z = torch.cat([z0, z_v], 1) # flow layers for idx, flow in enumerate(flows): z, logdet = flow(z, x_mask, g=x, reverse=reverse) logdet_tot = logdet_tot + logdet if idx > 0: z = torch.flip(z, [1]) # flow layers - neg log likelihood nll_flow_layers = ( torch.sum(0.5 * (math.log(2 * math.pi) + (z ** 2)) * x_mask, [1, 2]) - logdet_tot ) return nll_flow_layers + nll_posterior_encoder flows = list(reversed(self.flows)) flows = flows[:-2] + [flows[-1]] # remove a useless vflow z = ( torch.rand(x.size(0), 2, x.size(2)).to(device=x.device, dtype=x.dtype) * noise_scale ) for flow in flows: z = torch.flip(z, [1]) z = flow(z, x_mask, g=x, reverse=reverse) z0, _ = torch.split(z, [1, 1], 1) logw = z0 return logw
def compute_loss_v3(self, preds, ground_truth, anchor_base, img_size): """ :param preds: [N, 125, 13, 13] :param ground_truth: [N, 13, 13, 5, 25] :param anchor_base: [5, 2] :param img_size: 416 :return: """ # grid_size format is [h, w] N = preds.size(0) grid_size = preds.shape[2] bce_no_reduce = torch.nn.BCEWithLogitsLoss(reduction='none') # ratio's format is [h, w] ratio = (img_size / grid_size).float().to(self.opt.device) xy_offset, pred_bboxes, pred_confs, pred_classes = self.reorg_layer( preds, anchor_base, img_size) # obj_mask记录存在目标的cell # obj_mask: [N, 13, 13, 5] obj_mask = ground_truth[..., 4].bool() # ignore_mask:忽略那些与任一gt_box的iou值大于0.6的pred_box的conf损失 # ignore_mask: [N, 13, 13, 5] ignore_mask = torch.zeros_like(obj_mask).bool() for i in range(N): # [13, 13, 5, 4] & [13, 13, 5] -> [M/4, 4] # valid_bbox: [M, 4] / [ctr_x, ctr_y, w, h] valid_bbox = ground_truth[i, ..., :4][obj_mask[i]] # valid_bbox = torch.masked_select(ground_truth[i, ..., :4], obj_mask[i, ..., None]).reshape(-1, 4) # ious: [13, 13, 5, M] # [13, 13, 5, 4] & [M, 4] -> [13, 13, 5, M] ious = yolov2_bbox_iou(pred_bboxes[i], valid_bbox) # best_iou: [13, 13, 5] max_iou, _ = torch.max(ious, dim=-1) ignore_mask[i] = max_iou.lt(self.opt.pos_iou_thresh) # pred_xy: [N, 13, 13, 5, 2] # pred_xy's and label_xy's format is [w, h] # 因为pred_bboxes经过reorg_layer处理后rescale到了input_img的scale,在计算loss时需要把pred_xy的scale缩放到grid的scale pred_dxdy = (pred_bboxes[..., 0:2] / ratio) - xy_offset true_dxdy = (ground_truth[..., 0:2] / ratio) - xy_offset # pred_wh: [N, 13, 13, 5, 2] # 这里除以anchor是因为reorg_layer函数处理后对predict_bbox_wh乘以了anchor,这里只是还原模型最初输出的预测值 pred_twth = pred_bboxes[..., 2:4] / anchor_base true_twth = ground_truth[..., 2:4] / anchor_base # for numercial stability # 防止等于0的值在进行对数运算时得到负无穷 pred_twth[pred_twth == 0.] = 1. true_twth[true_twth == 0.] = 1. # 这里取对数是因为reorg_layer对pred_wh进行了exponential运算 pred_twth = torch.clamp_min(pred_twth, min=1e-9).log() true_twth = torch.clamp_min(true_twth, min=1e-9).log() # box with smaller area has higer weight # [N, 13, 13, 5] loc_loss_weight = 1.5 - (ground_truth[..., 2] / img_size) * (ground_truth[..., 3] / img_size) assert (loc_loss_weight <= 1.5).all() # 对存在目标的预测框计算xy和wh损失 # [N, 13, 13, 5, 2] & [N, 13, 13, 5, 1] & [N, 13, 13, 5, 1] -> [N,13,13,5,1] obj_mask = obj_mask[..., None].float() dxdy_loss = torch.pow(true_dxdy - pred_dxdy, 2.) * obj_mask * loc_loss_weight[..., None] dxdy_loss = self.opt.reg_scale * dxdy_loss.sum() / N twth_loss = torch.pow(true_twth - pred_twth, 2.) * obj_mask * loc_loss_weight[..., None] twth_loss = self.opt.reg_scale * twth_loss.sum() / N # 对存在目标的预测框计算置信度损失 # [N, 13, 13, 5, 1] & ([N,13,13,5,1] & [N,13,13,5,1] -> [N,13,13,5,1] conf_loss_obj = obj_mask * bce_no_reduce(pred_confs, obj_mask) # 不存在目标且与任一gt_box之间的iou值小于0.6的预测框计算置信度损失 # [N,13,13,5,1] & [N,13,13,5,1] & [N,13,13,5,1] -> [N,13,13,5,1] # ignore_mask: [N, 13, 13, 5, 1] ignore_mask = ignore_mask[..., None].float() conf_loss_noobj = (1. - obj_mask) * ignore_mask * bce_no_reduce( pred_confs, obj_mask) # total conf loss # [batch_size, 13, 13, 5, 1] conf_loss = self.opt.obj_scale * conf_loss_obj + self.opt.noobj_scale * conf_loss_noobj if self.opt.use_focal_loss: focal_mask = self.focal_loss(labels=obj_mask, preds=pred_confs) conf_loss = (focal_mask * conf_loss).sum() / N else: conf_loss = conf_loss.sum() / N # 对存在目标的预测框计算分类损失 if self.opt.use_smooth_labels: true_classes = self.smooth_labels(ground_truth[..., 5:], self.opt.voc_class_num) else: true_classes = ground_truth[..., 5:] # [batch_size,13,13,5] & [batch_size,13,13,5,20] & [batch_size,13,13,5,20] -> [batch_size,13,13,5,20] class_loss = obj_mask * bce_no_reduce(pred_classes, true_classes) class_loss = self.opt.cls_scale * class_loss.sum() / N total_loss = dxdy_loss + twth_loss + conf_loss + class_loss loss_list = [dxdy_loss, twth_loss, conf_loss, class_loss, total_loss] self.update_meters(loss_list) return total_loss
def forward(self, pred, target): losses = -(((1 - pred)**self.gamma) * target * torch.clamp_min(torch.log(pred), -100) + (pred**self.zeta) * (1 - target) * torch.clamp_min(torch.log(1 - pred), -100)) return torch.mean(losses)
def validate_kitti(model, args, eval_loader, group, seqmap): """ Peform validation using the KITTI-2015 (train) split """ """ Peform validation using the KITTI-2015 (train) split """ model.eval() gpu = args.gpu pred_pose_recs = dict() for k in seqmap.keys(): local_eval_num = int(seqmap[k]['enid']) - int(seqmap[k]['stid']) pred_pose_recs[k] = torch.zeros(local_eval_num, 4, 4).cuda(device=gpu) for val_id, data_blob in enumerate(tqdm(eval_loader)): image1 = data_blob['img1'].cuda(gpu) / 255.0 image2 = data_blob['img2'].cuda(gpu) / 255.0 intrinsic = data_blob['intrinsic'].cuda(gpu) insmap = data_blob['insmap'].cuda(gpu) posepred = data_blob['posepred'].cuda(gpu) mD_pred = data_blob['mdDepth_pred'].cuda(gpu) ang_decps_pad = data_blob['ang_decps_pad'].cuda(gpu) scl_decps_pad = data_blob['scl_decps_pad'].cuda(gpu) mvd_decps_pad = data_blob['mvd_decps_pad'].cuda(gpu) if args.banins: insmap = insmap * 0 mD_pred_clipped = torch.clamp_min(mD_pred, min=args.min_depth_pred) posepred = posepred[:, :, 0] ang_decps_pad = ang_decps_pad[:, :, 0] scl_decps_pad = scl_decps_pad[:, :, 0] mvd_decps_pad = mvd_decps_pad[:, :, 0] outputs = model(image1, image2, mD_pred_clipped, intrinsic, posepred, ang_decps_pad, scl_decps_pad, mvd_decps_pad, insmap) for k in range(len(data_blob['tag'])): posepred = outputs[('afft_all', 2)][k, -1] tag = data_blob['tag'][k] seq = tag.split(' ')[0].split('/')[1][0:21] frmid = int(tag.split(' ')[1]) - int(seqmap[seq]['stid']) pred_pose_recs[seq][frmid] = posepred for k in seqmap.keys(): dist.all_reduce(tensor=pred_pose_recs[k], op=dist.ReduceOp.SUM, group=group) if args.gpu == 0: tot_err = dict() tot_err['positions_pred'] = 0 tot_err['positions_RANSAC'] = 0 tot_err['positions_Deepv2d'] = 0 tot_err['positions_RANSAC_Deepv2dscale'] = 0 tot_err['positions_RANSAC_Odomscale'] = 0 for s in seqmap.keys(): posrec = dict() pred_poses = pred_pose_recs[s].cpu().numpy() RANSAC_poses = list() for k in range(int(seqmap[s]['stid']), int(seqmap[s]['enid'])): RANSAC_pose_path = os.path.join(args.RANSACPose_root, "000", s[0:10], s + "_sync", 'image_02', "{}.pickle".format(str(k).zfill(10))) RANSAC_pose = pickle.load(open(RANSAC_pose_path, "rb")) RANSAC_poses.append(RANSAC_pose[0]) Deepv2d_poses = list() for k in range(int(seqmap[s]['stid']), int(seqmap[s]['enid'])): Deepv2d_pose_path = os.path.join(args.deepv2dPose_root, s[0:10], s + "_sync", 'posepred', "{}.txt".format(str(k).zfill(10))) Deepv2d_pose = read_deepv2d_pose(Deepv2d_pose_path) Deepv2d_poses.append(Deepv2d_pose) gtposes_sourse = readlines(os.path.join(project_rootdir, 'exp_poses/kittiodom_gt/poses', "{}.txt".format(str(seqmap[s]['mapid']).zfill(2)))) gtposes = list() for gtpose_src in gtposes_sourse: gtpose = np.eye(4).flatten() for numstridx, numstr in enumerate(gtpose_src.split(' ')): gtpose[numstridx] = float(numstr) gtpose = np.reshape(gtpose, [4, 4]) gtposes.append(gtpose) relposes = list() for k in range(len(gtposes) - 1): relposes.append(np.linalg.inv(gtposes[k + 1]) @ gtposes[k]) calib_dir = os.path.join(args.dataset_root, "{}".format(s[0:10])) cam2cam = read_calib_file(os.path.join(calib_dir, 'calib_cam_to_cam.txt')) velo2cam = read_calib_file(os.path.join(calib_dir, 'calib_velo_to_cam.txt')) imu2cam = read_calib_file(os.path.join(calib_dir, 'calib_imu_to_velo.txt')) intrinsic, extrinsic = get_intrinsic_extrinsic(cam2cam, velo2cam, imu2cam) positions_odom = list() scale_odom = list() stpos = np.array([[0, 0, 0, 1]]).T accumP = np.eye(4) for r in relposes: accumP = r @ accumP positions_odom.append((np.linalg.inv(extrinsic) @ np.linalg.inv(accumP) @ stpos)[0:3, 0]) scale_odom.append(np.sqrt(np.sum(r[0:3, 3] ** 2) + 1e-10)) positions_odom = np.array(positions_odom) scale_odom = np.array(scale_odom) positions_pred = list() scale_pred = list() stpos = np.array([[0, 0, 0, 1]]).T accumP = np.eye(4) for p in pred_poses: accumP = p @ accumP positions_pred.append((np.linalg.inv(extrinsic) @ np.linalg.inv(accumP) @ stpos)[0:3, 0]) scale_pred.append(np.sqrt(np.sum(p[0:3, 3] ** 2) + 1e-10)) positions_pred = np.array(positions_pred) scale_pred = np.array(scale_pred) positions_RANSAC = list() scale_RANSAC = list() stpos = np.array([[0, 0, 0, 1]]).T accumP = np.eye(4) for r in RANSAC_poses: accumP = r @ accumP positions_RANSAC.append((np.linalg.inv(extrinsic) @ np.linalg.inv(accumP) @ stpos)[0:3, 0]) scale_RANSAC.append(np.sqrt(np.sum(r[0:3, 3] ** 2) + 1e-10)) positions_RANSAC = np.array(positions_RANSAC) scale_RANSAC = np.array(scale_RANSAC) positions_Deepv2d = list() scale_Deepv2d = list() stpos = np.array([[0, 0, 0, 1]]).T accumP = np.eye(4) for d in Deepv2d_poses: accumP = d @ accumP positions_Deepv2d.append((np.linalg.inv(extrinsic) @ np.linalg.inv(accumP) @ stpos)[0:3, 0]) scale_Deepv2d.append(np.sqrt(np.sum(d[0:3, 3] ** 2) + 1e-10)) positions_Deepv2d = np.array(positions_Deepv2d) scale_Deepv2d = np.array(scale_Deepv2d) positions_RANSAC_Deepv2dscale = list() stpos = np.array([[0, 0, 0, 1]]).T accumP = np.eye(4) for i, r in enumerate(RANSAC_poses): r[0:3, 3] = r[0:3, 3] / np.sqrt(np.sum(r[0:3, 3] ** 2) + 1e-10) * np.sqrt( np.sum(Deepv2d_poses[i][0:3, 3] ** 2) + 1e-10) accumP = r @ accumP positions_RANSAC_Deepv2dscale.append((np.linalg.inv(extrinsic) @ np.linalg.inv(accumP) @ stpos)[0:3, 0]) positions_RANSAC_Deepv2dscale = np.array(positions_RANSAC_Deepv2dscale) positions_RANSAC_Odomscale = list() stpos = np.array([[0, 0, 0, 1]]).T accumP = np.eye(4) for i, r in enumerate(RANSAC_poses): r[0:3, 3] = r[0:3, 3] / np.sqrt(np.sum(r[0:3, 3] ** 2) + 1e-10) * np.sqrt( np.sum(relposes[i][0:3, 3] ** 2) + 1e-10) accumP = r @ accumP positions_RANSAC_Odomscale.append((np.linalg.inv(extrinsic) @ np.linalg.inv(accumP) @ stpos)[0:3, 0]) positions_RANSAC_Odomscale = np.array(positions_RANSAC_Odomscale) posrec['positions_pred'] = positions_pred posrec['positions_RANSAC'] = positions_RANSAC posrec['positions_Deepv2d'] = positions_Deepv2d posrec['positions_RANSAC_Deepv2dscale'] = positions_RANSAC_Deepv2dscale posrec['positions_RANSAC_Odomscale'] = positions_RANSAC_Odomscale scalerec = dict() scalerec['scale_pred'] = scale_pred scalerec['scale_RANSAC'] = scale_RANSAC scalerec['scale_Deepv2d'] = scale_Deepv2d print("============= %s ============" % (s)) print("In total %d images," % positions_odom.shape[0]) for k in posrec.keys(): err_odom = np.mean(np.sqrt(np.sum((posrec[k] - positions_odom) ** 2, axis=1))) if 'scale_{}'.format(k.split('_')[1]) in scalerec.keys(): err_scale = np.mean(np.abs(scalerec['scale_{}'.format(k.split('_')[1])] - scale_odom)) else: err_scale = np.nan tot_err[k] += err_odom * len(pred_poses) print("%s, err_odom: %f, err_scale: %f" % (k, err_odom.item(), err_scale.item())) return {'absl': float(tot_err['positions_pred'].item()),} else: return None
def backprop(self, hps, obs, actions, old_logprobs, returns, value_loss_scale, advantages, old_values, action_masks, old_probs, privileged_obs, split_reward): if self.fp16: advantages = advantages.half() returns = returns.half() action_masks = action_masks.half() old_logprobs = old_logprobs.half() action_masks = action_masks[:, :self.agents, :] x, (pitems, pmask) = self.latents(obs, privileged_obs) batch_size = x.size()[0] vin = x.max(dim=1).values.view(batch_size, self.d_agent * self.hps.dff_ratio) if self.hps.use_privileged: pitems_max = pitems.max(dim=1).values pitems_avg = pitems.sum(dim=1) / torch.clamp_min( (~pmask).float().sum(dim=1), min=1).unsqueeze(-1) vin = torch.cat([vin, pitems_max, pitems_avg], dim=1) values = self.value_head(vin).view(-1) logits = self.policy_head(x) probs = F.softmax(logits, dim=2) probs = probs.view(-1, self.agents, self.naction) # add small value to prevent degenerate probability distribution when no action is possible # gradients still get blocked by the action mask # TODO: mask actions by setting logits to -inf? probs = probs * action_masks + self.epsilon active_agents = torch.clamp_min( (action_masks.sum(dim=2) > 0).float().sum(dim=1), min=1) dist = distributions.Categorical(probs) entropy = dist.entropy() logprobs = dist.log_prob(actions) ratios = torch.exp(logprobs - old_logprobs) advantages = advantages.view(-1, 1) if split_reward: advantages = advantages / active_agents.view(-1, 1) vanilla_policy_loss = advantages * ratios clipped_policy_loss = advantages * torch.clamp( ratios, 1 - hps.cliprange, 1 + hps.cliprange) if hps.ppo: policy_loss = -torch.min(vanilla_policy_loss, clipped_policy_loss).mean() else: policy_loss = -vanilla_policy_loss.mean() # TODO: do over full distribution, not just selected actions? approxkl = 0.5 * (old_logprobs - logprobs).pow(2).mean() clipfrac = ((ratios - 1.0).abs() > hps.cliprange).sum().type( torch.float32) / ratios.numel() clipped_values = old_values + torch.clamp( values - old_values, -hps.cliprange, hps.cliprange) vanilla_value_loss = (values - returns)**2 clipped_value_loss = (clipped_values - returns)**2 if hps.clip_vf: value_loss = torch.max(vanilla_value_loss, clipped_value_loss).mean() else: value_loss = vanilla_value_loss.mean() entropy_loss = -hps.entropy_bonus * entropy.mean() loss = policy_loss + value_loss_scale * value_loss + entropy_loss loss /= hps.batches_per_update loss.backward() return policy_loss.data.tolist(), value_loss.data.tolist( ), approxkl.data.tolist(), clipfrac.data.tolist()