Esempio n. 1
0
def results(cnf):
    # type: (Conf) -> None
    """
    Shows a visual representation of the obtained results
    using the test set images as input
    """

    # init Code Predictor
    code_predictor = CodePredictor()
    code_predictor.to(cnf.device)
    code_predictor.eval()
    code_predictor.requires_grad(False)
    code_predictor.load_w(f'log/{cnf.exp_name}/best.pth')

    # init Decoder
    autoencoder = Autoencoder(pretrained=True)
    autoencoder.to(cnf.device)
    autoencoder.eval()
    autoencoder.requires_grad(False)

    # init Hole Filler
    refiner = Refiner(pretrained=True)
    # refiner.to(cnf.device)
    refiner.eval()
    refiner.requires_grad(False)

    # init data loader
    ts = JTAValidationSet(cnf=cnf)
    loader = DataLoader(dataset=ts, batch_size=1, shuffle=False, num_workers=0)

    for step, sample in enumerate(loader):

        x, _, fx, fy, cx, cy, frame_path = sample

        x = x.to(cnf.device)
        fx, fy, cx, cy = fx.item(), fy.item(), cx.item(), cy.item()

        # image --> [code_predictor] --> code
        code_pred = code_predictor.forward(x).unsqueeze(0)

        # code --> [decode] --> hmap
        hmap_pred = autoencoder.decode(code_pred).squeeze()

        # hmap --> [local maxima search with cuda kernel] --> pseudo-3D coordinates
        pseudo3d_coords_pred = []
        confidences = []
        for jtype, hmp in enumerate(hmap_pred):
            suppressed_hmap = nms3d_cuda.NMSFilter3d(
                torch.nn.ConstantPad3d(1, 0)(hmp), 3, 1)
            nonzero_coords = torch.nonzero(suppressed_hmap).cpu()
            for coord in nonzero_coords:
                confidence = suppressed_hmap[tuple(coord)]
                if confidence > cnf.nms_th:
                    pseudo3d_coords_pred.append(
                        (jtype, coord[0].item(), coord[1].item(),
                         coord[2].item()))
                    confidences.append(confidence.cpu())

        # pseudo-3D coordinates --> [reverse projection] --> real 3D coordinates
        coords3d_pred = []
        for i in range(len(pseudo3d_coords_pred)):
            joint_type, cam_dist, y2d, x2d = pseudo3d_coords_pred[i]
            x2d, y2d, cam_dist = utils.rescale_to_real(x2d,
                                                       y2d,
                                                       cam_dist,
                                                       q=cnf.q)
            x3d, y3d, z3d = utils.to3d(x2d,
                                       y2d,
                                       cam_dist,
                                       fx=fx,
                                       fy=fy,
                                       cx=cx,
                                       cy=cy)
            coords3d_pred.append((joint_type, x3d, y3d, z3d))
        filter_joints(coords3d_pred, duplicate_th=0.05)

        # real 3D coordinates --> [association] --> list of poses
        poses = joint_association(coords3d_pred)

        # 3D poses -> [refiner] -> refined 3D poses
        refined_poses = []
        for _pose in poses:
            refined_pose = refine_pose(pose=_pose, refiner=refiner)
            if refined_pose is not None:
                refined_poses.append(refined_pose)

        # show output
        print(f'\n\t▶▶ Showing results of \'{frame_path[0]}\'')
        print(f'\t▶▶ It may take some time: please wait')
        print(f'\t▶▶ Close mayavi window to continue')
        show_poses(refined_poses)
Esempio n. 2
0
import torch
import torch.nn as nn
import numpy as np
from models import Autoencoder, Generator
from dataset import Corpus

#####################
# Generating data
#####################

ds = Corpus()
vocab = ds.vocab

generator = Generator(20, 100)
generator.eval()
generator.load_state_dict(torch.load('generator.th', map_location='cpu'))

autoencoder = Autoencoder(100, 600, 200, 100, vocab.size(), 0.5, 22)
autoencoder.eval()
autoencoder.load_state_dict(torch.load('autoencoder.th', map_location='cpu'))

# sample noise
noise = torch.FloatTensor(np.random.normal(0, 1, (1, 100)))
z = generator(noise[None,:,:])

# create new sent
logits = autoencoder.decode(z).squeeze()
seq = logits.argmax(dim=0)
print(ds.decode(seq))
Esempio n. 3
0
class Trainer(object):
    def __init__(self, cnf):
        # type: (Conf) -> None
        self.cnf = cnf

        # init code predictor
        self.code_predictor = CodePredictor()
        self.code_predictor = self.code_predictor.to(cnf.device)

        # init volumetric heatmap autoencoder
        self.autoencoder = Autoencoder()
        self.autoencoder.eval()
        self.autoencoder.requires_grad(False)
        self.autoencoder = self.autoencoder.to(cnf.device)

        # init optimizer
        self.optimizer = optim.Adam(params=self.code_predictor.parameters(),
                                    lr=cnf.lr)

        # init dataset(s)
        training_set = JTATrainingSet(cnf)
        test_set = JTAValidationSet(cnf)

        # init train/test loader
        self.train_loader = DataLoader(training_set,
                                       cnf.batch_size,
                                       num_workers=cnf.n_workers,
                                       shuffle=True)
        self.test_loader = DataLoader(test_set,
                                      batch_size=1,
                                      num_workers=cnf.n_workers,
                                      shuffle=False)

        # init logging stuffs
        self.log_path = cnf.exp_log_path
        print(f'tensorboard --logdir={cnf.project_log_path.abspath()}\n')
        self.sw = SummaryWriter(self.log_path)
        self.log_freq = len(self.train_loader)

        # starting values values
        self.epoch = 0
        self.best_test_f1 = None

        # possibly load checkpoint
        self.load_ck()

    def load_ck(self):
        """
        load training checkpoint
        """
        ck_path = self.log_path / 'training.ck'
        if ck_path.exists():
            ck = torch.load(ck_path, map_location=torch.device('cpu'))
            print(f'[loading checkpoint \'{ck_path}\']')
            self.epoch = ck['epoch']
            self.code_predictor.load_state_dict(ck['model'])
            self.best_test_f1 = self.best_test_f1
            self.optimizer.load_state_dict(ck['optimizer'])

    def save_ck(self):
        """
        save training checkpoint
        """
        ck = {
            'epoch': self.epoch,
            'model': self.code_predictor.state_dict(),
            'optimizer': self.optimizer.state_dict(),
            'best_test_loss': self.best_test_f1
        }
        torch.save(ck, self.log_path / 'training.ck')

    def train(self):
        """
        train model for one epoch on the Training-Set.
        """
        self.code_predictor.train()
        self.code_predictor.requires_grad(True)

        train_losses = []
        times = []
        start_time = time()
        t = time()
        for step, sample in enumerate(self.train_loader):
            self.optimizer.zero_grad()

            x, y_true = sample
            x, y_true = x.to(self.cnf.device), y_true.to(self.cnf.device)

            y_pred = self.code_predictor.forward(x)
            loss = nn.MSELoss()(y_pred, y_true)
            loss.backward()
            train_losses.append(loss.item())

            self.optimizer.step(None)

            # print an incredible progress bar
            progress = (step + 1) / self.cnf.epoch_len
            progress_bar = ('█' *
                            int(50 * progress)) + ('┈' *
                                                   (50 - int(50 * progress)))
            times.append(time() - t)
            t = time()
            if self.cnf.log_each_step or (not self.cnf.log_each_step
                                          and progress == 1):
                print(
                    '\r[{}] Epoch {:0{e}d}.{:0{s}d}: │{}│ {:6.2f}% │ Loss: {:.6f} │ ↯: {:5.2f} step/s'
                    .format(
                        datetime.now().strftime("%m-%d@%H:%M"),
                        self.epoch,
                        step + 1,
                        progress_bar,
                        100 * progress,
                        np.mean(train_losses),
                        1 / np.mean(times),
                        e=math.ceil(math.log10(self.cnf.epochs)),
                        s=math.ceil(math.log10(self.cnf.epoch_len)),
                    ),
                    end='')

            if step >= self.cnf.epoch_len - 1:
                break

        # log average loss of this epoch
        mean_epoch_loss = np.mean(train_losses)  # type: float
        self.sw.add_scalar(tag='train/loss',
                           scalar_value=mean_epoch_loss,
                           global_step=self.epoch)

        # log epoch duration
        print(f' │ T: {time() - start_time:.2f} s')

    def test(self):
        """
        test model on the Validation-Set
        """

        self.code_predictor.eval()
        self.code_predictor.requires_grad(False)

        t = time()
        test_prs = []
        test_res = []
        test_f1s = []
        for step, sample in enumerate(self.test_loader):
            x, coords3d_true, fx, fy, cx, cy, _ = sample

            fx, fy, cx, cy = fx.item(), fy.item(), cx.item(), cy.item()
            x = x.to(self.cnf.device)
            coords3d_true = json.loads(coords3d_true[0])

            # image --> [code_predictor] --> code
            code_pred = self.code_predictor.forward(x).unsqueeze(0)

            # code --> [decode] --> hmap(s)
            hmap_pred = self.autoencoder.decode(code_pred).squeeze()

            # hmap --> [local_maxima_3d] --> rescaled pseudo-3D coordinates
            coords2d_pred = utils.local_maxima_3d(hmaps3d=hmap_pred,
                                                  threshold=0.1,
                                                  device=self.cnf.device)

            # rescaled pseudo-3D coordinates --> [to_3d] --> real 3D coordinates
            coords3d_pred = []
            for i in range(len(coords2d_pred)):
                joint_type, cam_dist, y2d, x2d = coords2d_pred[i]
                x2d, y2d, cam_dist = utils.rescale_to_real(x2d=x2d,
                                                           y2d=y2d,
                                                           cam_dist=cam_dist,
                                                           q=self.cnf.q)
                x3d, y3d, z3d = utils.to3d(x2d=x2d,
                                           y2d=y2d,
                                           cam_dist=cam_dist,
                                           fx=fx,
                                           fy=fy,
                                           cx=cx,
                                           cy=cy)
                coords3d_pred.append((joint_type, x3d, y3d, z3d))

            # real 3D
            metrics = joint_det_metrics(points_pred=coords3d_pred,
                                        points_true=coords3d_true,
                                        th=self.cnf.det_th)
            pr, re, f1 = metrics['pr'], metrics['re'], metrics['f1']
            test_prs.append(pr)
            test_res.append(re)
            test_f1s.append(f1)

        # log average loss on test set
        mean_test_pr = float(np.mean(test_prs))
        mean_test_re = float(np.mean(test_res))
        mean_test_f1 = float(np.mean(test_f1s))

        # print test metrics
        print(
            f'\t● AVG (PR, RE, F1) on TEST-set: '
            f'({mean_test_pr * 100:.2f}, '
            f'{mean_test_re * 100:.2f}, '
            f'{mean_test_f1 * 100:.2f}) ',
            end='')
        print(f'│ T: {time() - t:.2f} s')

        self.sw.add_scalar(tag='test/precision',
                           scalar_value=mean_test_pr,
                           global_step=self.epoch)
        self.sw.add_scalar(tag='test/recall',
                           scalar_value=mean_test_re,
                           global_step=self.epoch)
        self.sw.add_scalar(tag='test/f1',
                           scalar_value=mean_test_f1,
                           global_step=self.epoch)

        # save best model
        if self.best_test_f1 is None or mean_test_f1 >= self.best_test_f1:
            self.best_test_f1 = mean_test_f1
            torch.save(self.code_predictor.state_dict(),
                       self.log_path / 'best.pth')

    def run(self):
        """
        start model training procedure (train > test > checkpoint > repeat)
        """
        for _ in range(self.epoch, self.cnf.epochs):
            self.train()
            self.test()
            self.epoch += 1
            self.save_ck()
Esempio n. 4
0
def compute(exp_name):
    # type: (str) -> None

    cnf = Conf(exp_name=exp_name)

    # init Code Predictor
    predictor = CodePredictor()  # type: BaseModel
    predictor.to(cnf.device)
    predictor.eval()
    predictor.requires_grad(False)
    predictor.load_w(cnf.exp_log_path / 'best.pth')

    # init Decoder
    autoencoder = Autoencoder()  # type: BaseModel
    autoencoder.to(cnf.device)
    autoencoder.eval()
    autoencoder.requires_grad(False)
    autoencoder.load_w(Path(__file__).parent / 'models/weights/vha.pth')

    # init Hole Filler
    hole_filler = Refiner(pretrained=True)
    hole_filler.to(cnf.device)
    hole_filler.eval()
    hole_filler.requires_grad(False)
    hole_filler.load_w(
        Path(__file__).parent / 'models/weights/pose_refiner.pth')

    # init data loader
    ts = JTATestingSet(cnf=cnf)
    loader = DataLoader(dataset=ts, batch_size=1, shuffle=False, num_workers=0)

    metrics_dict = {}
    for th in THS:
        for key in ['pr', 're', 'f1']:
            metrics_dict[f'{key}@{th}'] = []  # without refinement
            metrics_dict[f'{key}@{th}+'] = []  # with refinement

    for step, sample in enumerate(loader):

        x, coords3d_true, fx, fy, cx, cy, frame_path = sample
        x = x.to(cnf.device)
        coords3d_true = json.loads(coords3d_true[0])
        fx, fy, cx, cy = fx.item(), fy.item(), cx.item(), cy.item()

        # image --> [code_predictor] --> code
        code_pred = predictor.forward(x).unsqueeze(0)

        # code --> [decoder] --> hmap
        hmap_pred = autoencoder.decode(code_pred).squeeze()

        # hmap --> [local maxima search] --> pseudo-3D coordinates
        coords2d_pred = []
        confs = []
        for jtype, hmp in enumerate(hmap_pred.squeeze()):
            res = nms3d_cuda.NMSFilter3d(nn.ConstantPad3d(1, 0)(hmp), 3, 1)
            nz = torch.nonzero(res).cpu()
            for el in nz:
                confid = res[tuple(el)]
                if confid > 0.1:
                    coords2d_pred.append(
                        (jtype, el[0].item(), el[1].item(), el[2].item()))
                    confs.append(confid.cpu())

        # pseudo-3D coordinates --> [to_3d] --> real 3D coordinates
        coords3d_pred = []
        for i in range(len(coords2d_pred)):
            joint_type, cam_dist, y2d, x2d = coords2d_pred[i]
            x2d, y2d, cam_dist = utils.rescale_to_real(x2d,
                                                       y2d,
                                                       cam_dist,
                                                       q=cnf.q)
            x3d, y3d, z3d = utils.to3d(x2d,
                                       y2d,
                                       cam_dist,
                                       fx=fx,
                                       fy=fy,
                                       cx=cx,
                                       cy=cy)
            coords3d_pred.append((joint_type, x3d, y3d, z3d))

        # real 3D coordinates --> [association] --> list of poses
        poses = coords_to_poses(coords3d_pred, confs)

        # a solitary joint is a joint that has been excluded from the association
        # process since no valid connection could be found;
        # note that only solitary joints with a confidence value >0.6 are considered
        all_pose_joints = []
        for pose in poses:
            all_pose_joints += [(j.type, j.confidence, j.x3d, j.y3d, j.z3d)
                                for j in pose]
        coords3d_pred_ = [(c[0], confs[k], c[1], c[2], c[3])
                          for k, c in enumerate(coords3d_pred)]
        solitary = [(s[0], s[2], s[3], s[4])
                    for s in (set(coords3d_pred_) - set(all_pose_joints))
                    if s[1] > 0.6]

        # list of poses --> [hole filler] --> refined list of poses
        refined_poses = []
        for person_id, pose in enumerate(poses):
            confidences = [j.confidence for j in pose]
            pose = [(joint.type, joint.x3d, joint.y3d, joint.z3d)
                    for joint in pose]
            refined_pose = hole_filler.refine(pose=pose,
                                              hole_th=0.2,
                                              confidences=confidences,
                                              replace_th=1)
            refined_poses.append(refined_pose)

        # refined list of poses --> [something] --> refined_coords3d_pred
        refined_coords3d_pred = []
        for pose in refined_poses:
            refined_coords3d_pred += pose

        # compute metrics without refinement
        for th in THS:
            __m = joint_det_metrics(points_pred=coords3d_pred,
                                    points_true=coords3d_true,
                                    th=th)
            for key in ['pr', 're', 'f1']:
                metrics_dict[f'{key}@{th}'].append(__m[key])

        # compute metrics with refinement
        for th in THS:
            __m = joint_det_metrics(points_pred=refined_coords3d_pred +
                                    solitary,
                                    points_true=coords3d_true,
                                    th=th)
            for key in ['pr', 're', 'f1']:
                metrics_dict[f'{key}@{th}+'].append(__m[key])

        # print test progress
        print(f'\r>> processing test image {step} of {len(loader)}', end='')

    print('\r', end='')
    for th in THS:
        print(f'(PR, RE, F1)@{th}:'
              f'\tno_ref=('
              f'{np.mean(metrics_dict[f"pr@{th}"]) * 100:.2f}, '
              f'{np.mean(metrics_dict[f"re@{th}"]) * 100:.2f}, '
              f'{np.mean(metrics_dict[f"f1@{th}"]) * 100:.2f})'
              f'\twith_ref=('
              f'{np.mean(metrics_dict[f"pr@{th}+"]) * 100:.2f}, '
              f'{np.mean(metrics_dict[f"re@{th}+"]) * 100:.2f}, '
              f'{np.mean(metrics_dict[f"f1@{th}+"]) * 100:.2f}) ')
Esempio n. 5
0
def results(cnf):
    # type: (Conf) -> None
    """
    Shows a visual representation of the obtained results
    using the test set images as input
    """

    # init Code Predictor
    code_predictor = CodePredictor()
    code_predictor.to(cnf.device)
    code_predictor.eval()
    code_predictor.requires_grad(False)
    code_predictor.load_w(f'log/{cnf.exp_name}/best.pth')

    # init Decoder
    autoencoder = Autoencoder(pretrained=True)
    autoencoder.to(cnf.device)
    autoencoder.eval()
    autoencoder.requires_grad(False)

    # init Hole Filler
    refiner = Refiner(pretrained=True)
    refiner.to(cnf.device)
    refiner.eval()
    refiner.requires_grad(False)

    # init data loader
    ts = JTATestSet(cnf=cnf)
    loader = DataLoader(dataset=ts, batch_size=1, shuffle=False, num_workers=0)

    for step, sample in enumerate(loader):

        x, _, fx, fy, cx, cy, frame_path = sample

        x = x.to(cnf.device)
        fx, fy, cx, cy = fx.item(), fy.item(), cx.item(), cy.item()

        # image --> [code_predictor] --> code
        code_pred = code_predictor.forward(x).unsqueeze(0)

        # code --> [decode] --> hmap
        hmap_pred = autoencoder.decode(code_pred).squeeze()

        # hmap --> [local maxima search] --> pseudo-3D coordinates
        # coords2d_pred, confs = utils.local_maxima_3d(hmap_pred, threshold=0.2, device=cnf.device, ret_confs=True)

        # hmap --> [local maxima search with cuda kernel] --> pseudo-3D coordinates
        coords2d_pred = []
        confs = []
        for jtype, hmp in enumerate(hmap_pred):
            res = nms3d_cuda.NMSFilter3d(
                torch.nn.ConstantPad3d(1, 0)(hmp), 3, 1)
            nz = torch.nonzero(res).cpu()
            for el in nz:
                confid = res[tuple(el)]
                if confid > 0.1:
                    coords2d_pred.append(
                        (jtype, el[0].item(), el[1].item(), el[2].item()))
                    confs.append(confid.cpu())

        # pseudo-3D coordinates --> [to_3d] --> real 3D coordinates
        coords3d_pred = []
        for i in range(len(coords2d_pred)):
            joint_type, cam_dist, y2d, x2d = coords2d_pred[i]
            x2d, y2d, cam_dist = utils.rescale_to_real(x2d, y2d, cam_dist)
            x3d, y3d, z3d = utils.to3d(x2d,
                                       y2d,
                                       cam_dist,
                                       fx=fx,
                                       fy=fy,
                                       cx=cx,
                                       cy=cy)
            coords3d_pred.append((joint_type, x3d, y3d, z3d))

        # real 3D coordinates --> [association] --> list of poses
        poses = coords_to_poses(coords3d_pred, confs)

        # list of poses ---> [pose refiner] ---> refined list of poses
        refined_poses = []
        for person_id, pose in enumerate(poses):
            confidences = [j.confidence for j in pose]
            pose = [(joint.type, joint.x3d, joint.y3d, joint.z3d)
                    for joint in pose]
            refined_pose = refiner.refine(pose=pose,
                                          hole_th=0.2,
                                          confidences=confidences,
                                          replace_th=1)
            refined_poses.append(refined_pose)

        # show input
        img = cv2.imread(frame_path[0])
        cv2.imshow('input image', img)

        # show output
        show_poses(refined_poses)