Python get_bounding_boxの例、lib.utils.get_bounding_box Pythonの例

コード例 #1

0

ファイルを表示

ファイル: base_dataset.py プロジェクト: CompVis/behavior-driven-video-synthesis

    def _get_synth_weights(self, ids):
        kpts = self.datadict["keypoints"][ids]
        img_paths = self.datadict["img_paths"][ids]
        weight_maps = []
        for kps, p in zip(kpts, img_paths):
            img_shape = imagesize.get(p)
            scale_x = float(self.spatial_size) / img_shape[0]
            scale_y = float(self.spatial_size) / img_shape[1]

            kps_rescaled = np.multiply(
                kps[:, :2], np.asarray([scale_x, scale_y]), dtype=np.float32
            )

            bb = get_bounding_box(kps_rescaled, img_shape)["bbox"]
            weight_map = np.ones(
                shape=[self.spatial_size, self.spatial_size, 1], dtype=np.float
            )
            weight_map[
                max(0, bb[2]) : min(self.spatial_size, bb[3]),
                max(0, bb[0]) : min(self.spatial_size, bb[1]),
            ] = self.synth_weights
            weight_map = cv2.GaussianBlur(weight_map, (9, 9), sigmaX=3.0)
            weight_maps.append(torch.tensor(np.expand_dims(weight_map, axis=0)))
            # channel dimension squeezed after gaussian blur
            # weight_maps.append(
            #     torch.tensor(weight_map)
            # )

        return torch.stack(weight_maps, dim=0).squeeze(dim=0)

コード例 #2

0

ファイルを表示

ファイル: base_dataset.py プロジェクト: CompVis/behavior-driven-video-synthesis

    def _get_pose_img(self, ids, use_crops, use_complete_ddict=False):
        if use_complete_ddict:
            assert not use_crops
            img_paths = self.complete_datadict["img_paths"][ids]
        else:
            img_paths = self.datadict["img_paths"][ids]
        prep_img = []
        if use_crops:
            keypoints = self.datadict["keypoints"][ids]
            for (p, kps) in zip(img_paths, keypoints):
                pimg = cv2.imread(p)
                pimg = cv2.cvtColor(pimg, cv2.COLOR_BGR2RGB)

                crop_dict = get_bounding_box(kps, pimg.shape)
                cr_box = crop_dict["bbox"]

                if np.any(crop_dict["pads"] > 0):
                    pimg = cv2.copyMakeBorder(
                        pimg,
                        crop_dict["pads"][0],
                        crop_dict["pads"][1],
                        crop_dict["pads"][2],
                        crop_dict["pads"][3],
                        borderType=cv2.BORDER_REFLECT,
                    )
                pimg = pimg[cr_box[2] : cr_box[3], cr_box[0] : cr_box[1]]

                prep_img.append(self.transforms(pimg))
        else:
            for p in img_paths:
                pimg = cv2.imread(p)
                pimg = cv2.cvtColor(pimg, cv2.COLOR_BGR2RGB)
                prep_img.append(self.transforms(pimg))

        return torch.stack(prep_img, dim=0).squeeze()

コード例 #3

0

ファイルを表示

ファイル: base_dataset.py プロジェクト: CompVis/behavior-driven-video-synthesis

    def _get_stickman(self, ids, sscale=None):
        kpts = self.datadict["keypoints"][ids]
        img_paths = self.datadict["img_paths"][ids]
        if "img_size" in self.datadict and self.datadict["img_size"].size > 0:
            img_shapes = self.datadict["img_size"][ids]
        else:
            img_shapes = None

        stickmans = []

        for i, (kps, p) in enumerate(zip(kpts, img_paths)):
            # attention imagesize returns size as (width,height)
            if img_shapes is not None:
                img_shape = [img_shapes[i][1], img_shapes[i][0]]
            else:
                img_shape = imagesize.get(p)
            if self.use_crops:

                crop_dict = get_bounding_box(kps, [img_shape[1], img_shape[0]])
                bbx = crop_dict["bbox"]
                # new coordinate origin
                kps_rescaled = kps - np.asarray(
                    [bbx[0], bbx[2]], dtype=np.float
                )
                # scale to desired img size
                scale_x = float(self.spatial_size) / np.abs(bbx[1] - bbx[0])
                scale_y = float(self.spatial_size) / np.abs(bbx[3] - bbx[2])

                kps_rescaled = np.multiply(
                    kps_rescaled[:, :2],
                    np.asarray([scale_x, scale_y], dtype=np.float),
                )

            else:
                scale_x = float(self.spatial_size) / img_shape[0]
                scale_y = float(self.spatial_size) / img_shape[1]

                kps_rescaled = np.multiply(
                    kps[:, :2], np.asarray([scale_x, scale_y], dtype=np.float)
                )
            stickman = make_joint_img(
                [self.spatial_size, self.spatial_size],
                kps_rescaled,
                self.joint_model,
                line_colors=self.line_colors,
                scale_factor=self.stickman_scale if sscale is None else sscale,
            )
            if np.all(stickman == 0):
                print("zeroimg")
            stickmans.append(self.stickman_transforms(stickman))

        return torch.stack(stickmans, dim=0).squeeze()

コード例 #4

0

ファイルを表示

ファイル: base_dataset.py プロジェクト: CompVis/behavior-driven-video-synthesis

    def _get_keypoints(self, ids, use_map_ids=False):
        kpts = []
        if use_map_ids:
            ids = self._sample_valid_seq_ids(
                [self.datadict["map_ids"][ids[0]], len(ids) - 1]
            )
        if self.use_crops:
            for id in ids:
                kps = self.datadict["keypoints"][
                    id, self.joint_model.kps_to_use, :2
                ]
                imsize = imagesize.get(self.datadict["img_paths"][id])
                crop_dict = get_bounding_box(kps, [imsize[1], imsize[0]])
                bbx = crop_dict["bbox"]

                kps_rescaled = kps - np.asarray(
                    [bbx[0], bbx[2]], dtype=np.float
                )
                scale_x = 1.0 / np.abs(bbx[1] - bbx[0])
                scale_y = 1.0 / np.abs(bbx[3] - bbx[2])

                kps_rescaled = np.multiply(
                    kps_rescaled[:, :2],
                    np.asarray([scale_x, scale_y], dtype=np.float),
                )

                kpts.append(kps_rescaled)
        else:
            key = "norm_keypoints"
            for id in ids:
                kps = self.datadict[key][id, self.joint_model.kps_to_use, :2]
                kps = np.clip(kps, a_min=0.0, a_max=1.0)

                kpts.append(kps)

        kpts = np.stack(kpts, axis=0).squeeze()

        return kpts

コード例 #5

0

ファイルを表示

ファイル: base_dataset.py プロジェクト: CompVis/behavior-driven-video-synthesis

    def _get_app_img(self, ids, inplane_norm, use_complete_ddict=False):
        # this part always uses crops
        if use_complete_ddict and self.complete_datadict is not None:
            ddict = self.complete_datadict
        else:
            ddict = self.datadict
        if not isinstance(ids, Iterable):
            ids = [ids]
        app_paths = ddict["img_paths"][ddict["map_ids"][ids]]
        if not isinstance(app_paths, np.ndarray):
            app_paths = [app_paths]
        prep_imgs = []
        if inplane_norm:
            kpts = ddict["keypoints"][ddict["map_ids"][ids]]
            for p, kps in zip(app_paths, kpts):
                orig_img = cv2.imread(p)
                orig_img = cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)
                # original height
                # oh = orig_img.shape[0]
                oh = self.spatial_size
                # target heights and widths
                # hw = [s // 2** self.box_factor for s in orig_img.shape[:2]]
                # wh = list(reversed(hw))
                hw = (
                    self.spatial_size // 2 ** self.box_factor,
                    self.spatial_size // 2 ** self.box_factor,
                )
                wh = hw
                part_imgs = []
                for t in self.joint_model.norm_T:
                    part_img = np.zeros([hw[0], hw[1], 3], dtype=np.uint8)
                    # get transformation
                    T = t(kps, jm=self.joint_model, wh=wh, oh=oh)
                    if T is not None:
                        part_img = cv2.warpPerspective(
                            orig_img, T, hw, borderMode=cv2.BORDER_REPLICATE
                        )
                    else:
                        part_img = np.zeros((hw[0], hw[1], 3), dtype=np.uint8)
                    part_imgs.append(self.stickman_transforms(part_img))

                # since part_imgs are already torch.tensors, concatenate in first axis
                pimg = torch.cat(part_imgs, dim=0)
                prep_imgs.append(pimg)

        else:
            # use image cropped around the keypoints of the specific person
            if self.use_crops_for_app:
                kpts = ddict["keypoints"][ddict["map_ids"][ids]]
                for p, kps in zip(app_paths, kpts):
                    pimg = cv2.imread(p)
                    pimg = cv2.cvtColor(pimg, cv2.COLOR_BGR2RGB)

                    crop_dict = get_bounding_box(kps, pimg.shape)
                    cr_box = crop_dict["bbox"]

                    if np.any(crop_dict["pads"] > 0):
                        pimg = cv2.copyMakeBorder(
                            pimg,
                            crop_dict["pads"][0],
                            crop_dict["pads"][1],
                            crop_dict["pads"][2],
                            crop_dict["pads"][3],
                            borderType=cv2.BORDER_REFLECT,
                        )
                    pimg = pimg[cr_box[2] : cr_box[3], cr_box[0] : cr_box[1]]
                    prep_imgs.append(self.transforms(pimg))
            else:
                for p in app_paths:
                    pimg = cv2.imread(p)
                    pimg = cv2.cvtColor(pimg, cv2.COLOR_BGR2RGB)
                    prep_imgs.append(self.transforms(pimg))

        return torch.stack(prep_imgs, dim=0).squeeze()