def results(cnf): # type: (Conf) -> None """ Shows a visual representation of the obtained results using the test set images as input """ # init Code Predictor code_predictor = CodePredictor() code_predictor.to(cnf.device) code_predictor.eval() code_predictor.requires_grad(False) code_predictor.load_w(f'log/{cnf.exp_name}/best.pth') # init Decoder autoencoder = Autoencoder(pretrained=True) autoencoder.to(cnf.device) autoencoder.eval() autoencoder.requires_grad(False) # init Hole Filler refiner = Refiner(pretrained=True) # refiner.to(cnf.device) refiner.eval() refiner.requires_grad(False) # init data loader ts = JTAValidationSet(cnf=cnf) loader = DataLoader(dataset=ts, batch_size=1, shuffle=False, num_workers=0) for step, sample in enumerate(loader): x, _, fx, fy, cx, cy, frame_path = sample x = x.to(cnf.device) fx, fy, cx, cy = fx.item(), fy.item(), cx.item(), cy.item() # image --> [code_predictor] --> code code_pred = code_predictor.forward(x).unsqueeze(0) # code --> [decode] --> hmap hmap_pred = autoencoder.decode(code_pred).squeeze() # hmap --> [local maxima search with cuda kernel] --> pseudo-3D coordinates pseudo3d_coords_pred = [] confidences = [] for jtype, hmp in enumerate(hmap_pred): suppressed_hmap = nms3d_cuda.NMSFilter3d( torch.nn.ConstantPad3d(1, 0)(hmp), 3, 1) nonzero_coords = torch.nonzero(suppressed_hmap).cpu() for coord in nonzero_coords: confidence = suppressed_hmap[tuple(coord)] if confidence > cnf.nms_th: pseudo3d_coords_pred.append( (jtype, coord[0].item(), coord[1].item(), coord[2].item())) confidences.append(confidence.cpu()) # pseudo-3D coordinates --> [reverse projection] --> real 3D coordinates coords3d_pred = [] for i in range(len(pseudo3d_coords_pred)): joint_type, cam_dist, y2d, x2d = pseudo3d_coords_pred[i] x2d, y2d, cam_dist = utils.rescale_to_real(x2d, y2d, cam_dist, q=cnf.q) x3d, y3d, z3d = utils.to3d(x2d, y2d, cam_dist, fx=fx, fy=fy, cx=cx, cy=cy) coords3d_pred.append((joint_type, x3d, y3d, z3d)) filter_joints(coords3d_pred, duplicate_th=0.05) # real 3D coordinates --> [association] --> list of poses poses = joint_association(coords3d_pred) # 3D poses -> [refiner] -> refined 3D poses refined_poses = [] for _pose in poses: refined_pose = refine_pose(pose=_pose, refiner=refiner) if refined_pose is not None: refined_poses.append(refined_pose) # show output print(f'\n\t▶▶ Showing results of \'{frame_path[0]}\'') print(f'\t▶▶ It may take some time: please wait') print(f'\t▶▶ Close mayavi window to continue') show_poses(refined_poses)
import torch import torch.nn as nn import numpy as np from models import Autoencoder, Generator from dataset import Corpus ##################### # Generating data ##################### ds = Corpus() vocab = ds.vocab generator = Generator(20, 100) generator.eval() generator.load_state_dict(torch.load('generator.th', map_location='cpu')) autoencoder = Autoencoder(100, 600, 200, 100, vocab.size(), 0.5, 22) autoencoder.eval() autoencoder.load_state_dict(torch.load('autoencoder.th', map_location='cpu')) # sample noise noise = torch.FloatTensor(np.random.normal(0, 1, (1, 100))) z = generator(noise[None,:,:]) # create new sent logits = autoencoder.decode(z).squeeze() seq = logits.argmax(dim=0) print(ds.decode(seq))
class Trainer(object): def __init__(self, cnf): # type: (Conf) -> None self.cnf = cnf # init code predictor self.code_predictor = CodePredictor() self.code_predictor = self.code_predictor.to(cnf.device) # init volumetric heatmap autoencoder self.autoencoder = Autoencoder() self.autoencoder.eval() self.autoencoder.requires_grad(False) self.autoencoder = self.autoencoder.to(cnf.device) # init optimizer self.optimizer = optim.Adam(params=self.code_predictor.parameters(), lr=cnf.lr) # init dataset(s) training_set = JTATrainingSet(cnf) test_set = JTAValidationSet(cnf) # init train/test loader self.train_loader = DataLoader(training_set, cnf.batch_size, num_workers=cnf.n_workers, shuffle=True) self.test_loader = DataLoader(test_set, batch_size=1, num_workers=cnf.n_workers, shuffle=False) # init logging stuffs self.log_path = cnf.exp_log_path print(f'tensorboard --logdir={cnf.project_log_path.abspath()}\n') self.sw = SummaryWriter(self.log_path) self.log_freq = len(self.train_loader) # starting values values self.epoch = 0 self.best_test_f1 = None # possibly load checkpoint self.load_ck() def load_ck(self): """ load training checkpoint """ ck_path = self.log_path / 'training.ck' if ck_path.exists(): ck = torch.load(ck_path, map_location=torch.device('cpu')) print(f'[loading checkpoint \'{ck_path}\']') self.epoch = ck['epoch'] self.code_predictor.load_state_dict(ck['model']) self.best_test_f1 = self.best_test_f1 self.optimizer.load_state_dict(ck['optimizer']) def save_ck(self): """ save training checkpoint """ ck = { 'epoch': self.epoch, 'model': self.code_predictor.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_test_loss': self.best_test_f1 } torch.save(ck, self.log_path / 'training.ck') def train(self): """ train model for one epoch on the Training-Set. """ self.code_predictor.train() self.code_predictor.requires_grad(True) train_losses = [] times = [] start_time = time() t = time() for step, sample in enumerate(self.train_loader): self.optimizer.zero_grad() x, y_true = sample x, y_true = x.to(self.cnf.device), y_true.to(self.cnf.device) y_pred = self.code_predictor.forward(x) loss = nn.MSELoss()(y_pred, y_true) loss.backward() train_losses.append(loss.item()) self.optimizer.step(None) # print an incredible progress bar progress = (step + 1) / self.cnf.epoch_len progress_bar = ('█' * int(50 * progress)) + ('┈' * (50 - int(50 * progress))) times.append(time() - t) t = time() if self.cnf.log_each_step or (not self.cnf.log_each_step and progress == 1): print( '\r[{}] Epoch {:0{e}d}.{:0{s}d}: │{}│ {:6.2f}% │ Loss: {:.6f} │ ↯: {:5.2f} step/s' .format( datetime.now().strftime("%m-%d@%H:%M"), self.epoch, step + 1, progress_bar, 100 * progress, np.mean(train_losses), 1 / np.mean(times), e=math.ceil(math.log10(self.cnf.epochs)), s=math.ceil(math.log10(self.cnf.epoch_len)), ), end='') if step >= self.cnf.epoch_len - 1: break # log average loss of this epoch mean_epoch_loss = np.mean(train_losses) # type: float self.sw.add_scalar(tag='train/loss', scalar_value=mean_epoch_loss, global_step=self.epoch) # log epoch duration print(f' │ T: {time() - start_time:.2f} s') def test(self): """ test model on the Validation-Set """ self.code_predictor.eval() self.code_predictor.requires_grad(False) t = time() test_prs = [] test_res = [] test_f1s = [] for step, sample in enumerate(self.test_loader): x, coords3d_true, fx, fy, cx, cy, _ = sample fx, fy, cx, cy = fx.item(), fy.item(), cx.item(), cy.item() x = x.to(self.cnf.device) coords3d_true = json.loads(coords3d_true[0]) # image --> [code_predictor] --> code code_pred = self.code_predictor.forward(x).unsqueeze(0) # code --> [decode] --> hmap(s) hmap_pred = self.autoencoder.decode(code_pred).squeeze() # hmap --> [local_maxima_3d] --> rescaled pseudo-3D coordinates coords2d_pred = utils.local_maxima_3d(hmaps3d=hmap_pred, threshold=0.1, device=self.cnf.device) # rescaled pseudo-3D coordinates --> [to_3d] --> real 3D coordinates coords3d_pred = [] for i in range(len(coords2d_pred)): joint_type, cam_dist, y2d, x2d = coords2d_pred[i] x2d, y2d, cam_dist = utils.rescale_to_real(x2d=x2d, y2d=y2d, cam_dist=cam_dist, q=self.cnf.q) x3d, y3d, z3d = utils.to3d(x2d=x2d, y2d=y2d, cam_dist=cam_dist, fx=fx, fy=fy, cx=cx, cy=cy) coords3d_pred.append((joint_type, x3d, y3d, z3d)) # real 3D metrics = joint_det_metrics(points_pred=coords3d_pred, points_true=coords3d_true, th=self.cnf.det_th) pr, re, f1 = metrics['pr'], metrics['re'], metrics['f1'] test_prs.append(pr) test_res.append(re) test_f1s.append(f1) # log average loss on test set mean_test_pr = float(np.mean(test_prs)) mean_test_re = float(np.mean(test_res)) mean_test_f1 = float(np.mean(test_f1s)) # print test metrics print( f'\t● AVG (PR, RE, F1) on TEST-set: ' f'({mean_test_pr * 100:.2f}, ' f'{mean_test_re * 100:.2f}, ' f'{mean_test_f1 * 100:.2f}) ', end='') print(f'│ T: {time() - t:.2f} s') self.sw.add_scalar(tag='test/precision', scalar_value=mean_test_pr, global_step=self.epoch) self.sw.add_scalar(tag='test/recall', scalar_value=mean_test_re, global_step=self.epoch) self.sw.add_scalar(tag='test/f1', scalar_value=mean_test_f1, global_step=self.epoch) # save best model if self.best_test_f1 is None or mean_test_f1 >= self.best_test_f1: self.best_test_f1 = mean_test_f1 torch.save(self.code_predictor.state_dict(), self.log_path / 'best.pth') def run(self): """ start model training procedure (train > test > checkpoint > repeat) """ for _ in range(self.epoch, self.cnf.epochs): self.train() self.test() self.epoch += 1 self.save_ck()
def compute(exp_name): # type: (str) -> None cnf = Conf(exp_name=exp_name) # init Code Predictor predictor = CodePredictor() # type: BaseModel predictor.to(cnf.device) predictor.eval() predictor.requires_grad(False) predictor.load_w(cnf.exp_log_path / 'best.pth') # init Decoder autoencoder = Autoencoder() # type: BaseModel autoencoder.to(cnf.device) autoencoder.eval() autoencoder.requires_grad(False) autoencoder.load_w(Path(__file__).parent / 'models/weights/vha.pth') # init Hole Filler hole_filler = Refiner(pretrained=True) hole_filler.to(cnf.device) hole_filler.eval() hole_filler.requires_grad(False) hole_filler.load_w( Path(__file__).parent / 'models/weights/pose_refiner.pth') # init data loader ts = JTATestingSet(cnf=cnf) loader = DataLoader(dataset=ts, batch_size=1, shuffle=False, num_workers=0) metrics_dict = {} for th in THS: for key in ['pr', 're', 'f1']: metrics_dict[f'{key}@{th}'] = [] # without refinement metrics_dict[f'{key}@{th}+'] = [] # with refinement for step, sample in enumerate(loader): x, coords3d_true, fx, fy, cx, cy, frame_path = sample x = x.to(cnf.device) coords3d_true = json.loads(coords3d_true[0]) fx, fy, cx, cy = fx.item(), fy.item(), cx.item(), cy.item() # image --> [code_predictor] --> code code_pred = predictor.forward(x).unsqueeze(0) # code --> [decoder] --> hmap hmap_pred = autoencoder.decode(code_pred).squeeze() # hmap --> [local maxima search] --> pseudo-3D coordinates coords2d_pred = [] confs = [] for jtype, hmp in enumerate(hmap_pred.squeeze()): res = nms3d_cuda.NMSFilter3d(nn.ConstantPad3d(1, 0)(hmp), 3, 1) nz = torch.nonzero(res).cpu() for el in nz: confid = res[tuple(el)] if confid > 0.1: coords2d_pred.append( (jtype, el[0].item(), el[1].item(), el[2].item())) confs.append(confid.cpu()) # pseudo-3D coordinates --> [to_3d] --> real 3D coordinates coords3d_pred = [] for i in range(len(coords2d_pred)): joint_type, cam_dist, y2d, x2d = coords2d_pred[i] x2d, y2d, cam_dist = utils.rescale_to_real(x2d, y2d, cam_dist, q=cnf.q) x3d, y3d, z3d = utils.to3d(x2d, y2d, cam_dist, fx=fx, fy=fy, cx=cx, cy=cy) coords3d_pred.append((joint_type, x3d, y3d, z3d)) # real 3D coordinates --> [association] --> list of poses poses = coords_to_poses(coords3d_pred, confs) # a solitary joint is a joint that has been excluded from the association # process since no valid connection could be found; # note that only solitary joints with a confidence value >0.6 are considered all_pose_joints = [] for pose in poses: all_pose_joints += [(j.type, j.confidence, j.x3d, j.y3d, j.z3d) for j in pose] coords3d_pred_ = [(c[0], confs[k], c[1], c[2], c[3]) for k, c in enumerate(coords3d_pred)] solitary = [(s[0], s[2], s[3], s[4]) for s in (set(coords3d_pred_) - set(all_pose_joints)) if s[1] > 0.6] # list of poses --> [hole filler] --> refined list of poses refined_poses = [] for person_id, pose in enumerate(poses): confidences = [j.confidence for j in pose] pose = [(joint.type, joint.x3d, joint.y3d, joint.z3d) for joint in pose] refined_pose = hole_filler.refine(pose=pose, hole_th=0.2, confidences=confidences, replace_th=1) refined_poses.append(refined_pose) # refined list of poses --> [something] --> refined_coords3d_pred refined_coords3d_pred = [] for pose in refined_poses: refined_coords3d_pred += pose # compute metrics without refinement for th in THS: __m = joint_det_metrics(points_pred=coords3d_pred, points_true=coords3d_true, th=th) for key in ['pr', 're', 'f1']: metrics_dict[f'{key}@{th}'].append(__m[key]) # compute metrics with refinement for th in THS: __m = joint_det_metrics(points_pred=refined_coords3d_pred + solitary, points_true=coords3d_true, th=th) for key in ['pr', 're', 'f1']: metrics_dict[f'{key}@{th}+'].append(__m[key]) # print test progress print(f'\r>> processing test image {step} of {len(loader)}', end='') print('\r', end='') for th in THS: print(f'(PR, RE, F1)@{th}:' f'\tno_ref=(' f'{np.mean(metrics_dict[f"pr@{th}"]) * 100:.2f}, ' f'{np.mean(metrics_dict[f"re@{th}"]) * 100:.2f}, ' f'{np.mean(metrics_dict[f"f1@{th}"]) * 100:.2f})' f'\twith_ref=(' f'{np.mean(metrics_dict[f"pr@{th}+"]) * 100:.2f}, ' f'{np.mean(metrics_dict[f"re@{th}+"]) * 100:.2f}, ' f'{np.mean(metrics_dict[f"f1@{th}+"]) * 100:.2f}) ')
def results(cnf): # type: (Conf) -> None """ Shows a visual representation of the obtained results using the test set images as input """ # init Code Predictor code_predictor = CodePredictor() code_predictor.to(cnf.device) code_predictor.eval() code_predictor.requires_grad(False) code_predictor.load_w(f'log/{cnf.exp_name}/best.pth') # init Decoder autoencoder = Autoencoder(pretrained=True) autoencoder.to(cnf.device) autoencoder.eval() autoencoder.requires_grad(False) # init Hole Filler refiner = Refiner(pretrained=True) refiner.to(cnf.device) refiner.eval() refiner.requires_grad(False) # init data loader ts = JTATestSet(cnf=cnf) loader = DataLoader(dataset=ts, batch_size=1, shuffle=False, num_workers=0) for step, sample in enumerate(loader): x, _, fx, fy, cx, cy, frame_path = sample x = x.to(cnf.device) fx, fy, cx, cy = fx.item(), fy.item(), cx.item(), cy.item() # image --> [code_predictor] --> code code_pred = code_predictor.forward(x).unsqueeze(0) # code --> [decode] --> hmap hmap_pred = autoencoder.decode(code_pred).squeeze() # hmap --> [local maxima search] --> pseudo-3D coordinates # coords2d_pred, confs = utils.local_maxima_3d(hmap_pred, threshold=0.2, device=cnf.device, ret_confs=True) # hmap --> [local maxima search with cuda kernel] --> pseudo-3D coordinates coords2d_pred = [] confs = [] for jtype, hmp in enumerate(hmap_pred): res = nms3d_cuda.NMSFilter3d( torch.nn.ConstantPad3d(1, 0)(hmp), 3, 1) nz = torch.nonzero(res).cpu() for el in nz: confid = res[tuple(el)] if confid > 0.1: coords2d_pred.append( (jtype, el[0].item(), el[1].item(), el[2].item())) confs.append(confid.cpu()) # pseudo-3D coordinates --> [to_3d] --> real 3D coordinates coords3d_pred = [] for i in range(len(coords2d_pred)): joint_type, cam_dist, y2d, x2d = coords2d_pred[i] x2d, y2d, cam_dist = utils.rescale_to_real(x2d, y2d, cam_dist) x3d, y3d, z3d = utils.to3d(x2d, y2d, cam_dist, fx=fx, fy=fy, cx=cx, cy=cy) coords3d_pred.append((joint_type, x3d, y3d, z3d)) # real 3D coordinates --> [association] --> list of poses poses = coords_to_poses(coords3d_pred, confs) # list of poses ---> [pose refiner] ---> refined list of poses refined_poses = [] for person_id, pose in enumerate(poses): confidences = [j.confidence for j in pose] pose = [(joint.type, joint.x3d, joint.y3d, joint.z3d) for joint in pose] refined_pose = refiner.refine(pose=pose, hole_th=0.2, confidences=confidences, replace_th=1) refined_poses.append(refined_pose) # show input img = cv2.imread(frame_path[0]) cv2.imshow('input image', img) # show output show_poses(refined_poses)