Exemple #1
0
    def _val(self):
        num_samples = len(self.ds_val)
        all_preds = np.zeros((num_samples, self.model_nof_joints, 3),
                             dtype=np.float32)
        all_boxes = np.zeros((num_samples, 6), dtype=np.float32)
        image_paths = []
        idx = 0
        self.model.eval()
        with torch.no_grad():
            for step, (image, target, target_weight, joints_data) in enumerate(
                    tqdm(self.dl_val, desc='Validating')):
                image = image.to(self.device)
                target = target.to(self.device)
                target_weight = target_weight.to(self.device)

                output = self.model(image)

                if self.flip_test_images:
                    image_flipped = flip_tensor(image, dim=-1)
                    output_flipped = self.model(image_flipped)

                    output_flipped = flip_back(output_flipped,
                                               self.ds_val.flip_pairs)

                    output = (output + output_flipped) * 0.5

                loss = self.loss_fn(output, target, target_weight)

                # Evaluate accuracy
                # Get predictions on the resized images (given as input)
                accs, avg_acc, cnt, joints_preds, joints_target = \
                    self.ds_train.evaluate_accuracy(output, target)

                # Original
                num_images = image.shape[0]

                # measure elapsed time
                c = joints_data['center'].numpy()
                s = joints_data['scale'].numpy()
                score = joints_data['score'].numpy()
                pixel_std = 200  # ToDo Parametrize this

                preds, maxvals = get_final_preds(
                    True, output, c, s,
                    pixel_std)  # ToDo check what post_processing exactly does

                all_preds[idx:idx + num_images, :,
                          0:2] = preds[:, :, 0:2].detach().cpu().numpy()
                all_preds[idx:idx + num_images, :,
                          2:3] = maxvals.detach().cpu().numpy()
                # double check this all_boxes parts
                all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2]
                all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2]
                all_boxes[idx:idx + num_images, 4] = np.prod(s * pixel_std, 1)
                all_boxes[idx:idx + num_images, 5] = score
                image_paths.extend(joints_data['imgPath'])

                idx += num_images

                self.mean_loss_val += loss.item()
                self.mean_acc_val += avg_acc.item()
                if self.use_tensorboard:
                    self.summary_writer.add_scalar(
                        'val_loss',
                        loss.item(),
                        global_step=step + self.epoch * self.len_dl_val)
                    self.summary_writer.add_scalar(
                        'val_acc',
                        avg_acc.item(),
                        global_step=step + self.epoch * self.len_dl_val)
                    if step == 0:
                        save_images(image,
                                    target,
                                    joints_target,
                                    output,
                                    joints_preds,
                                    joints_data['joints_visibility'],
                                    self.summary_writer,
                                    step=step + self.epoch * self.len_dl_train,
                                    prefix='test_')

        self.mean_loss_val /= len(self.dl_val)
        self.mean_acc_val /= len(self.dl_val)

        # COCO evaluation
        print('\nVal AP/AR')
        self.val_accs, self.mean_mAP_val = self.ds_val.evaluate_overall_accuracy(
            all_preds, all_boxes, image_paths, output_dir=self.log_path)
Exemple #2
0
    def _train(self):

        num_samples = self.len_dl_train * self.batch_size
        all_preds = np.zeros((num_samples, self.model_nof_joints, 3),
                             dtype=np.float32)
        all_boxes = np.zeros((num_samples, 6), dtype=np.float32)
        image_paths = []
        idx = 0

        self.model.train()
        for step, (image, target, target_weight, joints_data) in enumerate(
                tqdm(self.dl_train, desc='Training')):
            image = image.to(self.device)
            target = target.to(self.device)
            target_weight = target_weight.to(self.device)

            self.optim.zero_grad()

            output = self.model(image)

            loss = self.loss_fn(output, target, target_weight)

            loss.backward()

            self.optim.step()

            # Evaluate accuracy
            # Get predictions on the resized images (given as input)
            accs, avg_acc, cnt, joints_preds, joints_target = \
                self.ds_train.evaluate_accuracy(output, target)

            # Original
            num_images = image.shape[0]

            # measure elapsed time
            c = joints_data['center'].numpy()
            s = joints_data['scale'].numpy()
            score = joints_data['score'].numpy()
            pixel_std = 200  # ToDo Parametrize this

            # Get predictions on the original imagee
            preds, maxvals = get_final_preds(
                True, output.detach(), c, s,
                pixel_std)  # ToDo check what post_processing exactly does

            all_preds[idx:idx + num_images, :,
                      0:2] = preds[:, :, 0:2].detach().cpu().numpy()
            all_preds[idx:idx + num_images, :,
                      2:3] = maxvals.detach().cpu().numpy()
            all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2]
            all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2]
            all_boxes[idx:idx + num_images, 4] = np.prod(s * pixel_std, 1)
            all_boxes[idx:idx + num_images, 5] = score
            image_paths.extend(joints_data['imgPath'])

            idx += num_images

            self.mean_loss_train += loss.item()
            if self.use_tensorboard:
                self.summary_writer.add_scalar('train_loss',
                                               loss.item(),
                                               global_step=step +
                                               self.epoch * self.len_dl_train)
                self.summary_writer.add_scalar('train_acc',
                                               avg_acc.item(),
                                               global_step=step +
                                               self.epoch * self.len_dl_train)
                if step == 0:
                    save_images(image,
                                target,
                                joints_target,
                                output,
                                joints_preds,
                                joints_data['joints_visibility'],
                                self.summary_writer,
                                step=step + self.epoch * self.len_dl_train,
                                prefix='train_')

        self.mean_loss_train /= len(self.dl_train)

        # COCO evaluation
        print('\nTrain AP/AR')

        self.train_accs, self.mean_mAP_train = self.ds_train.evaluate_overall_accuracy(
            all_preds, all_boxes, image_paths, output_dir=self.log_path)
Exemple #3
0
    def _val(self):
        num_samples = len(self.ds_val)
        all_preds = np.zeros((num_samples, self.model_nof_joints, 3),
                             dtype=np.float32)
        all_boxes = np.zeros((num_samples, 6), dtype=np.float32)
        image_paths = []
        idx = 0
        self.model.eval()
        with torch.no_grad():
            for step, (image, target, target_weight, joints_data) in enumerate(
                    tqdm(self.dl_val, desc='Validating')):
                image = image.to(self.device)
                target = target.to(self.device)
                target_weight = target_weight.to(self.device)

                output = self.model(image)

                if self.flip_test_images:
                    image_flipped = flip_tensor(image, dim=-1)
                    output_flipped = self.model(image_flipped)

                    output_flipped = flip_back(output_flipped,
                                               self.ds_val.flip_pairs)

                    output = (output + output_flipped) * 0.5

                loss = self.loss_fn(output, target, target_weight)

                # Evalua la precision
                # Obtiene predicciones de las imagenes redimensionadas (como argumento)
                accs, avg_acc, cnt, joints_preds, joints_target = \
                    self.ds_train.evaluate_accuracy(output, target)

                # Original
                num_images = image.shape[0]

                # Mide el tiempo de calculo
                c = joints_data['center'].numpy()
                s = joints_data['scale'].numpy()
                score = joints_data['score'].numpy()
                pixel_std = 200

                preds, maxvals = get_final_preds(True, output, c, s, pixel_std)

                all_preds[idx:idx + num_images, :,
                          0:2] = preds[:, :, 0:2].detach().cpu().numpy()
                all_preds[idx:idx + num_images, :,
                          2:3] = maxvals.detach().cpu().numpy()

                all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2]
                all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2]
                all_boxes[idx:idx + num_images, 4] = np.prod(s * pixel_std, 1)
                all_boxes[idx:idx + num_images, 5] = score
                image_paths.extend(joints_data['imgPath'])

                idx += num_images

                self.mean_loss_val += loss.item()
                self.mean_acc_val += avg_acc.item()
                if self.use_tensorboard:
                    self.summary_writer.add_scalar(
                        'val_loss',
                        loss.item(),
                        global_step=step + self.epoch * self.len_dl_val)
                    self.summary_writer.add_scalar(
                        'val_acc',
                        avg_acc.item(),
                        global_step=step + self.epoch * self.len_dl_val)
                    if step == 0:
                        save_images(image,
                                    target,
                                    joints_target,
                                    output,
                                    joints_preds,
                                    joints_data['joints_visibility'],
                                    self.summary_writer,
                                    step=step + self.epoch * self.len_dl_train,
                                    prefix='test_')

        self.mean_loss_val /= len(self.dl_val)
        self.mean_acc_val /= len(self.dl_val)

        # evaluacion COCO
        print('\nVal AP/AR')
        self.val_accs, self.mean_mAP_val = self.ds_val.evaluate_overall_accuracy(
            all_preds, all_boxes, image_paths, output_dir=self.log_path)
    def _predict_batch(self, image):
        with torch.no_grad():

            heatmaps_list = None
            tags_list = []

            # scales and base (size, center, scale)
            scales = (1, )  # ToDo add support to multiple scales

            scales = sorted(scales, reverse=True)
            base_size, base_center, base_scale = get_multi_scale_size(
                image[0], self.resolution, 1, 1)

            # for each scale (at the moment, just one scale)
            for idx, scale in enumerate(scales):
                # rescale image, convert to tensor, move to device
                images = list()
                for img in image:
                    image, size_resized, _, _ = resize_align_multi_scale(
                        img,
                        self.resolution,
                        scale,
                        min(scales),
                        interpolation=self.interpolation)
                    image = self.transform(
                        cv2.cvtColor(image,
                                     cv2.COLOR_BGR2RGB)).unsqueeze(dim=0)
                    image = image.to(self.device)
                    images.append(image)
                images = torch.cat(images)

                # inference
                # output: list of HigherHRNet outputs (heatmaps)
                # avg_heatmaps: averaged heatmaps
                # tags: per-pixel identity ids.
                #       See Newell et al., Associative Embedding: End-to-End Learning for Joint Detection and
                #           Grouping, NIPS 2017. https://arxiv.org/abs/1611.05424 or
                #           http://papers.nips.cc/paper/6822-associative-embedding-end-to-end-learning-for-joint-detection-and-grouping
                outputs, heatmaps, tags = get_multi_stage_outputs(
                    self.model,
                    images,
                    with_flip=False,
                    project2image=True,
                    size_projected=size_resized,
                    nof_joints=self.nof_joints,
                    max_batch_size=self.max_batch_size)

                # aggregate the multiple heatmaps and tags
                heatmaps_list, tags_list = aggregate_results(
                    scale,
                    heatmaps_list,
                    tags_list,
                    heatmaps,
                    tags,
                    with_flip=False,
                    project2image=True)

            heatmaps = heatmaps_list.float() / len(scales)
            tags = torch.cat(tags_list, dim=4)

            # refine prediction
            # grouped has the shape (people, joints, 4) -> 4: (x, y, confidence, tag)
            # scores has the shape (people, ) and corresponds to the person confidence before refinement
            grouped, scores = self.output_parser.parse(
                heatmaps,
                tags,
                adjust=True,
                refine=True  # ToDo parametrize these two parameters
            )

            # get final predictions
            final_results = get_final_preds(
                grouped, base_center, base_scale,
                [heatmaps.shape[3], heatmaps.shape[2]])

            if self.filter_redundant_poses:
                # filter redundant poses - this step filters out poses whose joints have, on average, a difference
                #   lower than 3 pixels
                # this is useful when refine=True in self.output_parser.parse because that step joins together
                #   skeleton parts belonging to the same people (but then it does not remove redundant skeletons)
                final_pts = []
                # for each image
                for i in range(len(final_results)):
                    final_pts.insert(i, list())
                    # for each person
                    for pts in final_results[i]:
                        if len(final_pts[i]) > 0:
                            diff = np.mean(np.abs(
                                np.array(final_pts[i])[..., :2] -
                                pts[..., :2]),
                                           axis=(1, 2))
                            if np.any(
                                    diff < 3
                            ):  # average diff between this pose and another one is less than 3 pixels
                                continue
                        final_pts[i].append(pts)
                final_results = final_pts

            pts = []
            boxes = []
            for i in range(len(final_results)):
                pts.insert(i, np.asarray(final_results[i]))
                if len(pts[i]) > 0:
                    pts[i][..., [0, 1]] = pts[i][..., [
                        1, 0
                    ]]  # restoring (y, x) order as in SimpleHRNet
                    pts[i] = pts[i][..., :3]

                    if self.return_bounding_boxes:
                        left_top = np.min(pts[i][..., 0:2], axis=1)
                        right_bottom = np.max(pts[i][..., 0:2], axis=1)
                        # [x1, y1, x2, y2]
                        boxes.insert(
                            i,
                            np.stack([
                                left_top[:, 1], left_top[:, 0],
                                right_bottom[:, 1], right_bottom[:, 0]
                            ],
                                     axis=-1))
                else:
                    boxes.insert(i, [])

        res = list()
        if self.return_heatmaps:
            res.append(heatmaps)
        if self.return_bounding_boxes:
            res.append(boxes)
        res.append(pts)

        if len(res) > 1:
            return res
        else:
            return res[0]
Exemple #5
0
    def _val(self):
        num_samples = len(self.ds_val)

        all_preds = np.zeros((num_samples, self.model_nof_joints, 3),
                             dtype=np.float32)
        all_boxes = np.zeros((num_samples, 6), dtype=np.float32)
        image_paths = []
        idx = 0
        self.model.eval()
        losses = AverageMeter()
        avg_accs = AverageMeter()
        pbar = tqdm(self.dl_val, ncols=170)

        for step, (image, target, target_weight,
                   joints_data) in enumerate(self.dl_val):
            image = image.cuda()
            target = target.cuda()
            target_weight = target_weight.cuda()

            output = self.model(image)

            if self.flip_test_images:
                image_flipped = flip_tensor(image, dim=-1)
                output_flipped = self.model(image_flipped)

                output_flipped = flip_back(output_flipped,
                                           self.ds_val.flip_pairs)

                output = (output + output_flipped) * 0.5

            loss = self.loss_fn(output, target, target_weight)

            # Evaluate accuracy
            # Get predictions on the resized images (given as input)
            accs, avg_acc, cnt, joints_preds, joints_target = \
                self.ds_train.evaluate_accuracy(output, target)

            losses.update(loss)
            avg_accs.update(avg_acc)

            # Original
            num_images = image.shape[0]

            log = f'[Epoch {self.epoch}] '
            log += f'Valid loss : {loss.item():.4f}({losses.avg:.4f}) '
            log += f'Valid acc : {avg_acc.item():.4f}({avg_accs.avg:.4f}) '
            pbar.set_description(log)
            pbar.update()

            # measure elapsed time
            c = joints_data['center'].numpy()
            s = joints_data['scale'].numpy()
            score = joints_data['score'].numpy()
            pixel_std = 200  # ToDo Parametrize this

            preds, maxvals = get_final_preds(
                True, output, c, s,
                pixel_std)  # ToDo check what post_processing exactly does

            all_preds[idx:idx + num_images, :,
                      0:2] = preds[:, :, 0:2].detach().cpu().numpy()
            all_preds[idx:idx + num_images, :,
                      2:3] = maxvals.detach().cpu().numpy()
            # double check this all_boxes parts
            all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2]
            all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2]
            all_boxes[idx:idx + num_images, 4] = np.prod(s * pixel_std, 1)
            all_boxes[idx:idx + num_images, 5] = score
            image_paths.extend(joints_data['imgPath'])

            idx += num_images

            self.mean_loss_val += loss.item()
            self.mean_acc_val += avg_acc.item()
            if self.use_tensorboard:
                self.summary_writer.add_scalar('Valid/Loss',
                                               loss.item(),
                                               global_step=step +
                                               self.epoch * self.len_dl_val)
                self.summary_writer.add_scalar('Valid/Accuracy',
                                               avg_acc.item(),
                                               global_step=step +
                                               self.epoch * self.len_dl_val)
                if step == 0:
                    save_images(image,
                                target,
                                joints_target,
                                output,
                                joints_preds,
                                joints_data['joints_visibility'],
                                self.summary_writer,
                                step=step + self.epoch * self.len_dl_train,
                                prefix='test_')

        self.mean_loss_val /= len(self.dl_val)
        self.mean_acc_val /= len(self.dl_val)

        # COCO evaluation
        # print('\nVal AP/AR')
        self.val_accs, self.mean_mAP_val = self.ds_val.evaluate_overall_accuracy(
            all_preds, all_boxes, image_paths, output_dir=self.log_path)

        mean_mAP = self.val_accs[
            'AP']  # Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ]
        AP_5 = self.val_accs[
            'Ap .5']  # Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets= 20 ]
        AP_75 = self.val_accs[
            'AP .75']  # Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets= 20 ]
        mean_mAR = self.val_accs[
            'AR']  # Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.378
        AR_5 = self.val_accs[
            'AR .5']  # Average Recall     (AR) @[ IoU=0.50      | area=   all | maxDets= 20 ]
        AR_75 = self.val_accs[
            'AR .75']  # Average Recall     (AR) @[ IoU=0.75      | area=   all | maxDets= 20 ]

        log = f'[EPOCH {self.epoch}] Valid Loss : {losses.avg:.4f}, '
        log += f'Valid acc : {avg_accs.avg:.4f}, '
        log += f'AP : {mean_mAP:.4f}, '
        log += f'AP.5 : {AP_5:.4f}, '
        log += f'AP.75 : {AP_75:.4f}, '
        log += f'AR : {mean_mAR:.4f}, '
        pbar.set_description(log)
        pbar.close()

        if self.use_tensorboard:
            self.summary_writer.add_scalar('Valid/mean_mAP',
                                           mean_mAP,
                                           global_step=step +
                                           self.epoch * self.len_dl_val)
            self.summary_writer.add_scalar('Valid/AP.5',
                                           AP_5,
                                           global_step=step +
                                           self.epoch * self.len_dl_val)
            self.summary_writer.add_scalar('Valid/AP.75',
                                           AP_75,
                                           global_step=step +
                                           self.epoch * self.len_dl_val)
            self.summary_writer.add_scalar('Valid/mean_mAR',
                                           mean_mAR,
                                           global_step=step +
                                           self.epoch * self.len_dl_val)
            self.summary_writer.add_scalar('Valid/AR.5',
                                           AR_5,
                                           global_step=step +
                                           self.epoch * self.len_dl_val)
            self.summary_writer.add_scalar('Valid/AR.75',
                                           AR_75,
                                           global_step=step +
                                           self.epoch * self.len_dl_val)