Esempio n. 1
0
    def preprocess_fn(self, img, num_objects, keypoints, bboxes, category_id):
        """image pre-process and augmentation"""
        num_objs = min(num_objects, self.data_opt.max_objs)
        img = cv2.imdecode(img, cv2.IMREAD_COLOR)
        width = img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0
        rot = 0

        flipped = False
        if self.data_opt.rand_crop:
            s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
            h_border = self._get_border(self.data_opt.input_res[0], img.shape[0])
            w_border = self._get_border(self.data_opt.input_res[1], img.shape[1])
            c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
            c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
        else:
            sf = self.data_opt.scale
            cf = self.data_opt.shift
            c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
            c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
        if np.random.random() < self.data_opt.aug_rot:
            rf = self.data_opt.rotate
            rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)

        if np.random.random() < self.data_opt.flip_prop:
            flipped = True
            img = img[:, ::-1, :]
            c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, rot, self.data_opt.input_res)
        inp = cv2.warpAffine(img, trans_input, (self.data_opt.input_res[0], self.data_opt.input_res[1]),
                             flags=cv2.INTER_LINEAR)
        if self.run_mode == "train" and self.data_opt.color_aug:
            color_aug(self._data_rng, inp / 255., self.data_opt.eig_val, self.data_opt.eig_vec)
            inp *= 255.

        # caution: image normalization and transpose to nchw will both be done on device
        # inp = (inp.astype(np.float32) / 255. - self.data_opt.mean) / self.data_opt.std
        # inp = inp.transpose(2, 0, 1)

        if self.data_opt.output_res[0] != self.data_opt.output_res[1]:
            raise ValueError("Only square image was supported to used as output for convinient")

        output_res = self.data_opt.output_res[0]
        num_joints = self.data_opt.num_joints
        max_objs = self.data_opt.max_objs
        num_classes = self.data_opt.num_classes

        trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res])

        hm = np.zeros((num_classes, output_res, output_res), dtype=np.float32)
        hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32)
        dense_kps = np.zeros((num_joints, 2, output_res, output_res), dtype=np.float32)
        dense_kps_mask = np.zeros((num_joints, output_res, output_res), dtype=np.float32)
        wh = np.zeros((max_objs, 2), dtype=np.float32)
        kps = np.zeros((max_objs, num_joints * 2), dtype=np.float32)
        reg = np.zeros((max_objs, 2), dtype=np.float32)
        ind = np.zeros((max_objs), dtype=np.int32)
        reg_mask = np.zeros((max_objs), dtype=np.int32)
        kps_mask = np.zeros((max_objs, num_joints * 2), dtype=np.int32)
        hp_offset = np.zeros((max_objs * num_joints, 2), dtype=np.float32)
        hp_ind = np.zeros((max_objs * num_joints), dtype=np.int32)
        hp_mask = np.zeros((max_objs * num_joints), dtype=np.int32)

        draw_gaussian = draw_msra_gaussian if self.net_opt.mse_loss else draw_umich_gaussian
        ground_truth = []
        for k in range(num_objs):
            bbox = self._coco_box_to_bbox(bboxes[k])
            cls_id = int(category_id[k]) - 1
            pts = np.array(keypoints[k], np.float32).reshape(num_joints, 3)
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1  # index begin from zero
                pts[:, 0] = width - pts[:, 0] - 1
                for e in self.data_opt.flip_idx:
                    pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()

            lt = [bbox[0], bbox[3]]
            rb = [bbox[2], bbox[1]]
            bbox[:2] = affine_transform(bbox[:2], trans_output_rot)
            bbox[2:] = affine_transform(bbox[2:], trans_output_rot)
            if rot != 0:
                lt = affine_transform(lt, trans_output_rot)
                rb = affine_transform(rb, trans_output_rot)
                bbox[0] = min(lt[0], rb[0], bbox[0], bbox[2])
                bbox[2] = max(lt[0], rb[0], bbox[0], bbox[2])
                bbox[1] = min(lt[1], rb[1], bbox[1], bbox[3])
                bbox[3] = max(lt[1], rb[1], bbox[1], bbox[3])
            bbox = np.clip(bbox, 0, output_res - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h <= 0 or w <= 0:
                continue
            radius = gaussian_radius((math.ceil(h), math.ceil(w)))
            ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
            ct_int = ct.astype(np.int32)
            wh[k] = 1. * w, 1. * h
            ind[k] = ct_int[1] * output_res + ct_int[0]
            reg[k] = ct - ct_int
            reg_mask[k] = 1
            num_kpts = pts[:, 2].sum()
            if num_kpts == 0:
                hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
                reg_mask[k] = 0

            hp_radius = radius
            for j in range(num_joints):
                if pts[j, 2] > 0:
                    pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot)
                    if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
                            pts[j, 1] >= 0 and pts[j, 1] < output_res:
                        kps[k, j * 2: j * 2 + 2] = pts[j, :2] - ct_int
                        kps_mask[k, j * 2: j * 2 + 2] = 1
                        pt_int = pts[j, :2].astype(np.int32)
                        hp_offset[k * num_joints + j] = pts[j, :2] - pt_int
                        hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0]
                        hp_mask[k * num_joints + j] = 1
                        if self.net_opt.dense_hp:
                            # must be before draw center hm gaussian
                            draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, pts[j, :2] - ct_int,
                                           radius, is_offset=True)
                            draw_gaussian(dense_kps_mask[j], ct_int, radius)
                        draw_gaussian(hm_hp[j], pt_int, hp_radius)
            draw_gaussian(hm[cls_id], ct_int, radius)

            if self.enable_visual_image:
                gt = {
                    "category_id": int(cls_id + 1),
                    "bbox": [ct[0] - w / 2, ct[1] - h / 2, w, h],
                    "score": float("{:.2f}".format(1)),
                    "keypoints": pts.reshape(num_joints * 3).tolist(),
                }
                ground_truth.append(gt)
        ret = (inp, hm, reg_mask, ind, wh)
        if self.net_opt.dense_hp:
            dense_kps = dense_kps.reshape((num_joints * 2, output_res, output_res))
            dense_kps_mask = dense_kps_mask.reshape((num_joints, 1, output_res, output_res))
            dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1)
            dense_kps_mask = dense_kps_mask.reshape((num_joints * 2, output_res, output_res))
            ret += (dense_kps, dense_kps_mask)
        else:
            ret += (kps, kps_mask)

        ret += (reg, hm_hp, hp_offset, hp_ind, hp_mask)
        if self.enable_visual_image:
            out_img = cv2.warpAffine(img, trans_output_rot, (output_res, output_res), flags=cv2.INTER_LINEAR)
            visual_image(out_img, ground_truth, self.save_path, ratio=self.data_opt.input_res[0] // output_res)
        return ret
Esempio n. 2
0
    def preprocess_fn(self, img, num_objects, keypoints, bboxes, category_id):
        """image pre-process and augmentation"""
        num_objs = min(num_objects, self.data_opt.max_objs)
        img, width, c, s, rot, flipped = self.get_aug_param(img)

        trans_input = get_affine_transform(c, s, rot, self.data_opt.input_res)
        inp = cv2.warpAffine(
            img,
            trans_input,
            (self.data_opt.input_res[0], self.data_opt.input_res[1]),
            flags=cv2.INTER_LINEAR)

        assert self.data_opt.output_res[0] == self.data_opt.output_res[1]
        output_res = self.data_opt.output_res[0]
        num_joints = self.data_opt.num_joints
        max_objs = self.data_opt.max_objs
        num_classes = self.data_opt.num_classes

        trans_output_rot = get_affine_transform(c, s, rot,
                                                [output_res, output_res])

        hm = np.zeros((num_classes, output_res, output_res), dtype=np.float32)
        hm_hp = np.zeros((num_joints, output_res, output_res),
                         dtype=np.float32)
        dense_kps = np.zeros((num_joints, 2, output_res, output_res),
                             dtype=np.float32)
        dense_kps_mask = np.zeros((num_joints, output_res, output_res),
                                  dtype=np.float32)
        wh = np.zeros((max_objs, 2), dtype=np.float32)
        kps = np.zeros((max_objs, num_joints * 2), dtype=np.float32)
        reg = np.zeros((max_objs, 2), dtype=np.float32)
        ind = np.zeros((max_objs), dtype=np.int32)
        reg_mask = np.zeros((max_objs), dtype=np.int32)
        kps_mask = np.zeros((max_objs, num_joints * 2), dtype=np.int32)
        hp_offset = np.zeros((max_objs * num_joints, 2), dtype=np.float32)
        hp_ind = np.zeros((max_objs * num_joints), dtype=np.int32)
        hp_mask = np.zeros((max_objs * num_joints), dtype=np.int32)

        draw_gaussian = draw_msra_gaussian if self.net_opt.mse_loss else draw_umich_gaussian
        ground_truth = []
        for k in range(num_objs):
            bbox = self._coco_box_to_bbox(bboxes[k])
            cls_id = int(category_id[k]) - 1
            pts = np.array(keypoints[k], np.float32).reshape(num_joints, 3)
            if flipped:
                bbox[[0,
                      2]] = width - bbox[[2, 0]] - 1  # index begin from zero
                pts[:, 0] = width - pts[:, 0] - 1
                for e in self.data_opt.flip_idx:
                    pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()

            lt, rb = [bbox[0], bbox[3]], [bbox[2], bbox[1]]
            bbox[:2] = affine_transform(bbox[:2], trans_output_rot)
            bbox[2:] = affine_transform(bbox[2:], trans_output_rot)
            if rot != 0:
                lt = affine_transform(lt, trans_output_rot)
                rb = affine_transform(rb, trans_output_rot)
                for i in range(2):
                    bbox[i] = min(lt[i], rb[i], bbox[i], bbox[i + 2])
                    bbox[i + 2] = max(lt[i], rb[i], bbox[i], bbox[i + 2])
            bbox = np.clip(bbox, 0, output_res - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h <= 0 or w <= 0:
                continue
            hp_radius = radius = gaussian_radius((math.ceil(h), math.ceil(w)))
            ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                          dtype=np.float32)
            ct_int = ct.astype(np.int32)
            wh[k] = 1. * w, 1. * h
            ind[k] = ct_int[1] * output_res + ct_int[0]
            reg[k] = ct - ct_int
            reg_mask[k] = 1
            num_kpts = pts[:, 2].sum()
            if num_kpts == 0:
                hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
                reg_mask[k] = 0

            for j in range(num_joints):
                if pts[j, 2] > 0:
                    pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot)
                    if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
                            pts[j, 1] >= 0 and pts[j, 1] < output_res:
                        kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int
                        kps_mask[k, j * 2:j * 2 + 2] = 1
                        pt_int = pts[j, :2].astype(np.int32)
                        hp_offset[k * num_joints + j] = pts[j, :2] - pt_int
                        hp_ind[k * num_joints +
                               j] = pt_int[1] * output_res + pt_int[0]
                        hp_mask[k * num_joints + j] = 1
                        if self.net_opt.dense_hp:
                            # must be before draw center hm gaussian
                            draw_dense_reg(dense_kps[j],
                                           hm[cls_id],
                                           ct_int,
                                           pts[j, :2] - ct_int,
                                           radius,
                                           is_offset=True)
                            draw_gaussian(dense_kps_mask[j], ct_int, radius)
                        draw_gaussian(hm_hp[j], pt_int, hp_radius)
            draw_gaussian(hm[cls_id], ct_int, radius)

            if self.enable_visual_image:
                gt = {
                    "category_id": int(cls_id + 1),
                    "bbox": [ct[0] - w / 2, ct[1] - h / 2, w, h],
                    "score": float("{:.2f}".format(1)),
                    "keypoints": pts.reshape(num_joints * 3).tolist(),
                }
                ground_truth.append(gt)
        ret = (inp, hm, reg_mask, ind, wh)
        if self.net_opt.dense_hp:
            dense_kps = dense_kps.reshape(
                (num_joints * 2, output_res, output_res))
            dense_kps_mask = dense_kps_mask.reshape(
                (num_joints, 1, output_res, output_res))
            dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask],
                                            axis=1)
            dense_kps_mask = dense_kps_mask.reshape(
                (num_joints * 2, output_res, output_res))
            ret += (dense_kps, dense_kps_mask)
        else:
            ret += (kps, kps_mask)

        ret += (reg, hm_hp, hp_offset, hp_ind, hp_mask)
        if self.enable_visual_image:
            out_img = cv2.warpAffine(img,
                                     trans_output_rot,
                                     (output_res, output_res),
                                     flags=cv2.INTER_LINEAR)
            visual_image(out_img,
                         ground_truth,
                         self.save_path,
                         ratio=self.data_opt.input_res[0] // output_res)
        return ret
Esempio n. 3
0
    def pre_process_for_test(self, image, img_id, scale):
        """image pre-process for evaluation"""
        b, h, w, ch = image.shape
        assert b == 1, "only single image was supported here"
        image = image.reshape((h, w, ch))
        height, width = image.shape[0:2]
        new_height = int(height * scale)
        new_width = int(width * scale)
        if self.keep_res:
            inp_height = (new_height | self.pad) + 1
            inp_width = (new_width | self.pad) + 1
            c = np.array([new_width // 2, new_height // 2], dtype=np.float32)
            s = np.array([inp_width, inp_height], dtype=np.float32)
        else:
            inp_height, inp_width = self.data_opt.input_res[0], self.data_opt.input_res[1]
            c = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
            s = max(height, width) * 1.0

        trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
        resized_image = cv2.resize(image, (new_width, new_height))
        inp_image = cv2.warpAffine(resized_image, trans_input, (inp_width, inp_height),
                                   flags=cv2.INTER_LINEAR)
        inp_img = (inp_image.astype(np.float32) / 255. - self.data_opt.mean) / self.data_opt.std

        eval_image = inp_img.reshape((1,) + inp_img.shape)
        eval_image = eval_image.transpose(0, 3, 1, 2)

        meta = {'c': c, 's': s,
                'out_height': inp_height // self.net_opt.down_ratio,
                'out_width': inp_width // self.net_opt.down_ratio}

        if self.enable_visual_image:
            if self.run_mode != "test":
                annos = self.coco.loadAnns(self.anns[img_id])
                num_objs = min(len(annos), self.data_opt.max_objs)
                num_joints = self.data_opt.num_joints
                ground_truth = []
                for k in range(num_objs):
                    ann = annos[k]
                    bbox = self._coco_box_to_bbox(ann['bbox']) * scale
                    cls_id = int(ann['category_id']) - 1
                    pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3)
                    bbox[:2] = affine_transform(bbox[:2], trans_input)
                    bbox[2:] = affine_transform(bbox[2:], trans_input)
                    bbox[0::2] = np.clip(bbox[0::2], 0, inp_width - 1)
                    bbox[1::2] = np.clip(bbox[1::2], 0, inp_height - 1)
                    h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
                    if h <= 0 or w <= 0:
                        continue
                    for j in range(num_joints):
                        if pts[j, 2] > 0:
                            pts[j, :2] = affine_transform(pts[j, :2] * scale, trans_input)
                    bbox = [bbox[0], bbox[1], w, h]
                    gt = {
                        "image_id": int(img_id),
                        "category_id": int(cls_id + 1),
                        "bbox": bbox,
                        "score": float("{:.2f}".format(1)),
                        "keypoints": pts.reshape(num_joints * 3).tolist(),
                        "id": self.anns[img_id][k]
                    }
                    ground_truth.append(gt)
                visual_image(inp_image, ground_truth, self.save_path, height=inp_height, width=inp_width,
                             name="_scale" + str(scale))
            else:
                image_name = "gt_" + self.run_mode + "_image_" + str(img_id) + "_scale_" + str(scale) + ".png"
                cv2.imwrite("{}/{}".format(self.save_path, image_name), inp_image)

        return eval_image, meta
Esempio n. 4
0
    def preprocess_fn(self, image, num_objects, bboxes, category_id):
        """image pre-process and augmentation"""
        num_objs = min(num_objects, self.data_opt.max_objs)
        img = cv2.imdecode(image, cv2.IMREAD_COLOR)
        height = img.shape[0]
        width = img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(height, width) * 1.0
        input_h, input_w = self.data_opt.input_res[0], self.data_opt.input_res[
            1]
        rot = 0

        flipped = False
        if self.data_opt.rand_crop:
            s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
            h_border = self._get_border(128, img.shape[0])
            w_border = self._get_border(128, img.shape[1])
            c[0] = np.random.randint(low=w_border,
                                     high=img.shape[1] - w_border)
            c[1] = np.random.randint(low=h_border,
                                     high=img.shape[0] - h_border)
        else:
            sf = self.data_opt.scale
            cf = self.data_opt.shift
            c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
            c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

        if np.random.random() < self.data_opt.flip_prop:
            flipped = True
            img = img[:, ::-1, :]
            c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, rot, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.run_mode == "train" and self.data_opt.color_aug:
            color_aug(self._data_rng, inp, self.data_opt.eig_val,
                      self.data_opt.eig_vec)

        if self.data_opt.output_res[0] != self.data_opt.output_res[1]:
            raise ValueError(
                "Only square image was supported to used as output for convenient"
            )

        output_h = input_h // self.data_opt.down_ratio
        output_w = input_w // self.data_opt.down_ratio
        max_objs = self.data_opt.max_objs
        num_classes = self.data_opt.num_classes
        trans_output_rot = get_affine_transform(c, s, rot,
                                                [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((max_objs, 2), dtype=np.float32)
        ind = np.zeros((max_objs), dtype=np.int32)
        reg_mask = np.zeros((max_objs), dtype=np.int32)
        cat_spec_wh = np.zeros((max_objs, num_classes * 2), dtype=np.float32)
        cat_spec_mask = np.zeros((max_objs, num_classes * 2), dtype=np.int32)

        draw_gaussian = draw_msra_gaussian if self.net_opt.mse_loss else draw_umich_gaussian

        ground_truth = []
        for k in range(num_objs):
            bbox = bboxes[k]
            cls_id = category_id[k] - 1
            if flipped:
                bbox[[0,
                      2]] = width - bbox[[2, 0]] - 1  # index begin from zero
            bbox[:2] = affine_transform(bbox[:2], trans_output_rot)
            bbox[2:] = affine_transform(bbox[2:], trans_output_rot)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h <= 0 and w <= 0:
                continue
            radius = gaussian_radius((math.ceil(h), math.ceil(w)))
            radius = max(0, int(radius))
            ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                          dtype=np.float32)
            ct_int = ct.astype(np.int32)
            draw_gaussian(hm[cls_id], ct_int, radius)
            wh[k] = 1. * w, 1. * h
            ind[k] = ct_int[1] * output_w + ct_int[0]
            reg[k] = ct - ct_int
            reg_mask[k] = 1
            cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
            cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1

            if self.net_opt.dense_wh:
                draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
            ground_truth.append([
                ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1,
                cls_id
            ])
        ret = (inp, hm, reg_mask, ind, wh)
        if self.net_opt.dense_wh:
            hm_a = hm.max(axis=0)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret += (dense_wh, dense_wh_mask)
        elif self.net_opt.cat_spec_wh:
            ret += (cat_spec_wh, cat_spec_mask)
        if self.net_opt.reg_offset:
            ret += (reg, )
        return ret