Esempio n. 1
0
    def _get_pre_dets(self, anns, trans_input, trans_output):
        hm_h, hm_w = self.opt.input_h, self.opt.input_w
        down_ratio = self.opt.down_ratio
        trans = trans_input
        reutrn_hm = self.opt.pre_hm
        pre_hm = np.zeros(
            (1, hm_h, hm_w), dtype=np.float32) if reutrn_hm else None
        pre_cts, track_ids = [], []
        for ann in anns:
            cls_id = int(self.cat_ids[ann['category_id']])
            if cls_id > self.opt.num_classes or cls_id <= -99 or \
               ('iscrowd' in ann and ann['iscrowd'] > 0):
                continue
            bbox = self._coco_box_to_bbox(ann['bbox'])

            bbox[:2] = affine_transform(bbox[:2], trans)
            bbox[2:] = affine_transform(bbox[2:], trans)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            max_rad = 1

            if (h > 0 and w > 0):
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                max_rad = max(max_rad, radius)

                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct0 = ct.copy()
                conf = 1

                ct[0] = ct[0] + np.random.randn() * self.opt.hm_disturb * w
                ct[1] = ct[1] + np.random.randn() * self.opt.hm_disturb * h
                conf = 1 if np.random.random() > self.opt.lost_disturb else 0

                ct_int = ct.astype(np.int32)
                # if conf == 0:
                if conf == 1:
                    pre_cts.append(ct / down_ratio)
                else:
                    pre_cts.append(ct0 / down_ratio)

                track_ids.append(ann['track_id'] if 'track_id' in ann else -1)
                if reutrn_hm:
                    draw_umich_gaussian(pre_hm[0], ct_int, radius, k=conf)

                if np.random.random() < self.opt.fp_disturb and reutrn_hm:
                    ct2 = ct0.copy()
                    # Hard code heatmap disturb ratio, haven't tried other numbers.
                    ct2[0] = ct2[0] + np.random.randn() * 0.05 * w
                    ct2[1] = ct2[1] + np.random.randn() * 0.05 * h
                    ct2_int = ct2.astype(np.int32)
                    draw_umich_gaussian(pre_hm[0], ct2_int, radius, k=conf)

        return pre_hm, pre_cts, track_ids
Esempio n. 2
0
    def _gen_kmf_att_hm(self, ret, pre_anns, trans_input):
        trackers = {}
        trans = trans_input
        hm_h, hm_w = self.opt.input_h, self.opt.input_w
        iid = None
        for idx, anns in enumerate(pre_anns):  #[..., n-2, n-1]
            for i, ann in enumerate(anns):
                cls_id = int(self.cat_ids[ann['category_id']])
                if cls_id > self.opt.num_classes or cls_id <= -999 or cls_id == 0:
                    continue
                if 'bbox' not in anns[i].keys():
                    ann['bbox'] = mask_utils.toBbox(ann['segmentation'])
                bbox = self._coco_box_to_bbox(ann['bbox'])
                bbox[:2] = affine_transform(bbox[:2], trans)
                bbox[2:] = affine_transform(bbox[2:], trans)
                bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1)
                bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1)
                h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
                if h <= 0 or w <= 0:
                    continue

                if ann['track_id'] not in trackers:
                    trackers[ann['track_id']] = {}
                    trackers[ann['track_id']]['kmf'] = KalmanBoxTracker(bbox)
                    trackers[ann['track_id']]['age'] = 0
                    trackers[ann['track_id']]['cts_history'] = [
                        np.array([(bbox[0] + bbox[2]) / 2,
                                  (bbox[1] + bbox[3]) / 2],
                                 dtype=np.float32)
                    ]
                else:
                    if np.random.random(
                    ) > self.opt.att_track_lost_disturb or idx == len(
                            pre_anns) - 1:
                        trackers[ann['track_id']]['kmf'].predict()
                        trackers[ann['track_id']]['kmf'].update(bbox)
                        trackers[ann['track_id']]['age'] += 1
                        trackers[ann['track_id']]['cts_history'].append(
                            np.array([(bbox[0] + bbox[2]) / 2,
                                      (bbox[1] + bbox[3]) / 2],
                                     dtype=np.float32))
        for k in trackers:
            bbox = trackers[k]['kmf'].predict()[0]
            pred_ct = self._add_kmf_att(ret=ret,
                                        bbox=bbox,
                                        trans_input=trans_input,
                                        init=(trackers[k]['age'] <= 0),
                                        draw=(self.opt.kmf_att))
            if pred_ct is None:
                trackers[k]['ct'] = trackers[k]['cts_history'][-1]
            else:
                trackers[k]['ct'] = pred_ct

        return trackers
Esempio n. 3
0
def re_anns(anns, trans_output, output_w, output_h):
    # gt_boxes = [ann['bbox'] for ann in anns]
    # gt_boxes = cvtools.x1y1wh_to_x1y1x2y2(np.array(gt_boxes, dtype=np.float))
    # for bbox in gt_boxes:
    #     bbox[:2] = affine_transform(bbox[:2], trans_output)
    #     bbox[2:] = affine_transform(bbox[2:], trans_output)
    # iofs = cvtools.bbox_overlaps(
    #     gt_boxes, np.array([[0, 0, output_w - 1, output_h - 1]]), mode='iof'
    # )
    # ids = np.where(iofs > 0.7)[0]
    # anns = [anns[ind] for ind in ids]
    # num_objs = len(ids)
    # iofs = iofs[ids]
    for k in range(len(anns)):
        # segm = np.array(anns[k]['segmentation'][0])
        segm = anns[k]['segmentation'][0]
        for i in range(0, len(segm), 2):
            segm[i:i + 2] = affine_transform(segm[i:i + 2], trans_output)
            # segm[i] = np.clip(segm[i], 0, output_w - 1)
            # segm[i + 1] = np.clip(segm[i + 1], 0, output_h - 1)
        # if iofs[k] < 1.:
        #     img_box_polygon = np.array(
        #         x1y1wh_to_x1y1x2y2x3y3x4y4(
        #             [0, 0, output_w, output_h])
        #     ).reshape(-1, 2)
        #     segm_cutted, _ = cut_polygon(
        #         segm.reshape(-1, 2), img_box_polygon
        #     )
        #     assert segm_cutted is not None and len(segm_cutted) > 0
        #     segm = segm_cutted
        # anns[k]['segmentation'] = [segm.reshape(-1).tolist()]
    return anns
Esempio n. 4
0
    def __getitem__(self, index):
        if index < 10 and self.split == 'train':
            self.idxs = np.random.choice(self.num_samples,
                                         self.num_samples,
                                         replace=False)
        img = self._load_image(index)
        gt_3d, pts, c, s = self._get_part_info(index)

        r = 0
        s = np.array([s, s])
        s = adjust_aspect_ratio(s, self.aspect_ratio, self.opt.fit_short_side)

        trans_input = get_affine_transform(
            c, s, r, [self.opt.input_h, self.opt.input_w])
        inp = cv2.warpAffine(img,
                             trans_input, (self.opt.input_h, self.opt.input_w),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 256. - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        trans_output = get_affine_transform(
            c, s, r, [self.opt.output_h, self.opt.output_w])
        out = np.zeros((self.num_joints, self.opt.output_h, self.opt.output_w),
                       dtype=np.float32)
        reg_target = np.zeros((self.num_joints, 1), dtype=np.float32)
        reg_ind = np.zeros((self.num_joints), dtype=np.int64)
        reg_mask = np.zeros((self.num_joints), dtype=np.uint8)
        pts_crop = np.zeros((self.num_joints, 2), dtype=np.int32)
        for i in range(self.num_joints):
            pt = affine_transform(pts[i, :2], trans_output).astype(np.int32)
            if pt[0] >= 0 and pt[1] >=0 and pt[0] < self.opt.output_w \
              and pt[1] < self.opt.output_h:
                pts_crop[i] = pt
                out[i] = draw_gaussian(out[i], pt, self.opt.hm_gauss)
                reg_target[i] = pts[i, 2] / s[0]  # assert not fit_short
                reg_ind[i] = pt[1] * self.opt.output_w * self.num_joints + \
                             pt[0] * self.num_joints + i # note transposed
                reg_mask[i] = 1

        meta = {
            'index': self.idxs[index],
            'center': c,
            'scale': s,
            'gt_3d': gt_3d,
            'pts_crop': pts_crop
        }

        ret = {
            'input': inp,
            'target': out,
            'meta': meta,
            'reg_target': reg_target,
            'reg_ind': reg_ind,
            'reg_mask': reg_mask
        }

        return ret
Esempio n. 5
0
    def _get_data_AFE(self, anns, trans_input, trans_output):
        hm_h, hm_w = self.opt.input_h, self.opt.input_w
        down_ratio = self.opt.down_ratio
        trans = trans_input

        bboxes, track_ids = [], []
        for ann in anns:
            cls_id = int(self.cat_ids[ann["category_id"]])
            if (cls_id > self.opt.num_classes or cls_id <= -99
                    or ("iscrowd" in ann and ann["iscrowd"] > 0)):
                continue
            bbox = self._coco_box_to_bbox(ann["bbox"])
            bbox[:2] = affine_transform(bbox[:2], trans)
            bbox[2:] = affine_transform(bbox[2:], trans)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:

                bboxes.append([bbox[0], bbox[1], bbox[2], bbox[3]].copy())

                track_ids.append(ann["track_id"] if "track_id" in ann else -1)

        return bboxes, track_ids
Esempio n. 6
0
    def __getitem__(self, index):
        img = self._load_image(index)
        _, pts, c, s = self._get_part_info(index)
        r = 0

        if self.split == 'train':
            sf = self.opt.scale
            rf = self.opt.rotate
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \
                if np.random.random() <= 0.6 else 0
        s = min(s, max(img.shape[0], img.shape[1])) * 1.0
        s = np.array([s, s])
        s = adjust_aspect_ratio(s, self.aspect_ratio, self.opt.fit_short_side)

        flipped = (
            self.split == 'train' and np.random.random() < self.opt.flip)
        if flipped:
            img = img[:, ::-1, :]
            c[0] = img.shape[1] - 1 - c[0]
            pts[:, 0] = img.shape[1] - 1 - pts[:, 0]
            for e in self.shuffle_ref:
                pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()

        trans_input = get_affine_transform(
            c, s, r, [self.opt.input_h, self.opt.input_w])
        inp = cv2.warpAffine(
            img,
            trans_input,
            (self.opt.input_h,
             self.opt.input_w),
            flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 256. - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        trans_output = get_affine_transform(
            c, s, r, [self.opt.output_h, self.opt.output_w])
        out = np.zeros((self.num_joints, self.opt.output_h, self.opt.output_w),
                       dtype=np.float32)
        pts_crop = np.zeros((self.num_joints, 2), dtype=np.int32)
        for i in range(self.num_joints):
            if pts[i, 0] > 0 or pts[i, 1] > 0:
                pts_crop[i] = affine_transform(pts[i], trans_output)
                out[i] = draw_gaussian(out[i], pts_crop[i], self.opt.hm_gauss)

        meta = {'index': index, 'center': c, 'scale': s,
                'pts_crop': pts_crop}
        return {'input': inp, 'target': out, 'meta': meta}
Esempio n. 7
0
    def _add_hps(self, ret, k, ann, gt_det, trans_output, ct_int, bbox, h, w):
        num_joints = self.num_joints
        pts = (np.array(ann["keypoints"], np.float32).reshape(num_joints, 3)
               if "keypoints" in ann else np.zeros(
                   (self.num_joints, 3), np.float32))
        if self.opt.simple_radius > 0:
            hp_radius = int(
                simple_radius(h, w, min_overlap=self.opt.simple_radius))
        else:
            hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
            hp_radius = max(0, int(hp_radius))

        for j in range(num_joints):
            pts[j, :2] = affine_transform(pts[j, :2], trans_output)
            if pts[j, 2] > 0:
                if (pts[j, 0] >= 0 and pts[j, 0] < self.opt.output_w
                        and pts[j, 1] >= 0 and pts[j, 1] < self.opt.output_h):
                    ret["hps"][k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int
                    ret["hps_mask"][k, j * 2:j * 2 + 2] = 1
                    pt_int = pts[j, :2].astype(np.int32)
                    ret["hp_offset"][k * num_joints + j] = pts[j, :2] - pt_int
                    ret["hp_ind"][k * num_joints +
                                  j] = (pt_int[1] * self.opt.output_w +
                                        pt_int[0])
                    ret["hp_offset_mask"][k * num_joints + j] = 1
                    ret["hm_hp_mask"][k * num_joints + j] = 1
                    ret["joint"][k * num_joints + j] = j
                    draw_umich_gaussian(ret["hm_hp"][j], pt_int, hp_radius)
                    if pts[j, 2] == 1:
                        ret["hm_hp"][j, pt_int[1], pt_int[0]] = self.ignore_val
                        ret["hp_offset_mask"][k * num_joints + j] = 0
                        ret["hm_hp_mask"][k * num_joints + j] = 0
                else:
                    pts[j, :2] *= 0
            else:
                pts[j, :2] *= 0
                self._ignore_region(ret["hm_hp"][j,
                                                 int(bbox[1]):int(bbox[3]) + 1,
                                                 int(bbox[0]):int(bbox[2]) +
                                                 1, ])
        gt_det["hps"].append(pts[:, :2].reshape(num_joints * 2))
Esempio n. 8
0
    def pre_process(self, image, scale, meta=None, anns=None):
        height, width = image.shape[0:2]
        new_height = int(height * scale)
        new_width = int(width * scale)
        fix = False
        if height > 1024 or width > 1024:
            fix = True
        if self.opt.fix_res or fix:
            inp_height, inp_width = self.opt.input_h, self.opt.input_w
            c = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
            s = max(height, width) * 1.0
        else:
            inp_height = (new_height | self.opt.pad) + 1    # 保证能被32整除
            inp_width = (new_width | self.opt.pad) + 1
            # inp_height = new_height
            # inp_width = new_width
            c = np.array([new_width // 2, new_height // 2], dtype=np.float32)
            s = np.array([inp_width, inp_height], dtype=np.float32)

        trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
        resized_image = cv2.resize(image, (new_width, new_height))
        inp_image = cv2.warpAffine(
            resized_image, trans_input, (inp_width, inp_height),
            flags=cv2.INTER_LINEAR)

        if anns is not None:
            for ann in anns:
                segm = ann['segmentation'][0]
                for i in range(0, len(segm), 2):
                    segm[i:i + 2] = affine_transform(segm[i:i + 2], trans_input)

        inp_image = ((inp_image / 255. - self.mean) / self.std).astype(np.float32)

        images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width)
        if self.opt.flip_test:
            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
        images = torch.from_numpy(images)
        meta = {'c': c, 's': s,
                'out_height': inp_height // self.opt.down_ratio,
                'out_width': inp_width // self.opt.down_ratio}
        return images, meta
Esempio n. 9
0
    def __getitem__(self, index, debug=False):
        index = self.pick[index]
        dataset, index = self.find_dataset(index)

        gray = self.gray and self.gray > random.random()
        neg = self.neg and self.neg > random.random()

        if neg:
            template = dataset.get_random_target(index)
            if self.inner_neg and self.inner_neg > random.random():
                search = dataset.get_random_target()
            else:
                search = random.choice(self.all_data).get_random_target()
        else:
            template, search = dataset.get_positive_pair(index)

        def center_crop(img, size):
            shape = img.shape[1]
            if shape == size: return img
            c = shape // 2
            l = c - size // 2
            r = c + size // 2 + 1
            return img[l:r, l:r]

        template_image, scale_z = self.imread(template[0])

        if self.template_small:
            template_image = center_crop(template_image, self.template_size)

        search_image, scale_x = self.imread(search[0])

        if dataset.has_mask:
            if not neg:
                search_mask = (cv2.imread(search[2], 0) > 0).astype(np.float32)
            else:
                search_mask = np.zeros(search_image.shape[:2],
                                       dtype=np.float32)
        else:
            if not neg:
                search_kp = np.array(search[2], dtype=np.float32)
            else:
                search_kp = np.zeros(51, dtype=np.float32)

        if self.crop_size > 0:
            search_image = center_crop(search_image, self.crop_size)

        def toBBox(image, shape):
            imh, imw = image.shape[:2]
            if len(shape) == 4:
                w, h = shape[2] - shape[0], shape[3] - shape[1]
            else:
                w, h = shape
            context_amount = 0.5
            exemplar_size = self.template_size  # 127
            wc_z = w + context_amount * (w + h)
            hc_z = h + context_amount * (w + h)
            s_z = np.sqrt(wc_z * hc_z)
            scale_z = exemplar_size / s_z
            w = w * scale_z
            h = h * scale_z
            cx, cy = imw // 2, imh // 2
            bbox = center2corner(Center(cx, cy, w, h))
            return bbox

        template_box = toBBox(template_image, template[1])
        search_box = toBBox(search_image, search[1])
        # bbox = search_box
        template, _, _ = self.template_aug(template_image,
                                           template_box,
                                           self.template_size,
                                           gray=gray)
        search, bbox, mask = self.search_aug(search_image,
                                             search_box,
                                             self.search_size,
                                             gray=gray)

        def draw(image, box, name):
            image = image.copy()
            x1, y1, x2, y2 = map(lambda x: int(round(x)), box)
            cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0))
            cv2.imwrite(name, image)

        def crop_hwc(bbox, out_sz=255):
            a = (out_sz - 1) / (bbox[2] - bbox[0])
            b = (out_sz - 1) / (bbox[3] - bbox[1])
            c = -a * bbox[0]
            d = -b * bbox[1]
            mapping = np.array([[a, 0, c], [0, b, d]]).astype(np.float)
            # crop = cv2.warpAffine(image, mapping, (out_sz, out_sz),
            # borderMode=cv2.BORDER_CONSTANT, borderValue=padding)
            return mapping

        def crop_hwc1(image, bbox, out_sz, padding=(0, 0, 0)):
            a = (out_sz - 1) / (bbox[2] - bbox[0])
            b = (out_sz - 1) / (bbox[3] - bbox[1])
            c = -a * bbox[0]
            d = -b * bbox[1]
            mapping = np.array([[a, 0, c], [0, b, d]]).astype(np.float)
            crop = cv2.warpAffine(image, mapping, (out_sz, out_sz))
            return crop

        def pos_s_2_bbox(pos, s):
            bbox = [
                pos[0] - s / 2, pos[1] - s / 2, pos[0] + s / 2, pos[1] + s / 2
            ]
            return bbox

        def crop_like_SiamFCx(bbox,
                              exemplar_size=127,
                              context_amount=0.5,
                              search_size=255):
            target_pos = [(bbox[2] + bbox[0]) / 2., (bbox[3] + bbox[1]) / 2.]
            target_size = [bbox[2] - bbox[0] + 1, bbox[3] - bbox[1] + 1]
            wc_z = target_size[1] + context_amount * sum(target_size)
            hc_z = target_size[0] + context_amount * sum(target_size)
            s_z = np.sqrt(wc_z * hc_z)
            scale_z = exemplar_size / s_z
            d_search = (search_size - exemplar_size) / 2
            pad = d_search / scale_z
            s_x = s_z + 2 * pad

            # x = crop_hwc1(image, pos_s_2_bbox(target_pos, s_x), search_size, padding)
            return target_pos, s_x

        def kp_conversion(KeyPoints, matrix):

            key_points = []
            kps_conversion = []
            skeleton = [0, 0]
            Skeleton = []

            for i in range(0, int(len(KeyPoints) / 3)):
                skeleton[0] = KeyPoints[i * 3 + 0]
                skeleton[1] = KeyPoints[i * 3 + 1]
                Skeleton.append(skeleton[:])
                lis = Skeleton[i]
                lis.append(1)
                key_points.append(lis)

            key_points = np.array(key_points)

            for i in range(0, int(len(KeyPoints) / 3)):
                if KeyPoints[i * 3 + 2] != 0:
                    ky_conversion = np.matmul(matrix,
                                              key_points[i, :]).tolist()
                    kps_conversion.append(ky_conversion[0])
                    kps_conversion.append(ky_conversion[1])
                    kps_conversion.append(KeyPoints[i * 3 + 2])
                else:
                    kps_conversion.append(0)
                    kps_conversion.append(0)
                    kps_conversion.append(0)

            return kps_conversion

        if debug:
            draw(template_image, template_box,
                 "debug/{:06d}_ot.jpg".format(index))
            draw(search_image, search_box, "debug/{:06d}_os.jpg".format(index))
            draw(template, _, "debug/{:06d}_t.jpg".format(index))
            draw(search, bbox, "debug/{:06d}_s.jpg".format(index))

        cls, delta, delta_weight = self.anchor_target(self.anchors, bbox,
                                                      self.size, neg)
        if not dataset.has_mask:
            pos, s = crop_like_SiamFCx(search_box,
                                       exemplar_size=127,
                                       context_amount=0.5,
                                       search_size=255)
            mapping_bbox = pos_s_2_bbox(pos, s)

            mapping = crop_hwc(mapping_bbox, out_sz=255)

            keypoints = kp_conversion(search_kp.tolist(), mapping)

            joints_3d = np.zeros((self.num_joints, 3), dtype=np.float)
            joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float)
            for ipt in range(self.num_joints):
                joints_3d[ipt, 0] = keypoints[ipt * 3 + 0]
                joints_3d[ipt, 1] = keypoints[ipt * 3 + 1]
                joints_3d[ipt, 2] = keypoints[ipt * 3 + 2]
                t_vis = search_kp[ipt * 3 + 2]
                if t_vis > 1:
                    t_vis = 1
                joints_3d_vis[ipt, 0] = t_vis
                joints_3d_vis[ipt, 1] = t_vis
                joints_3d_vis[ipt, 2] = 0

            img = search.copy()
            # joints_3d = joints_3d / 255

            if not neg:
                kp_weight = cls.max(axis=0, keepdims=True)
            else:
                kp_weight = np.zeros([1, cls.shape[1], cls.shape[2]],
                                     dtype=np.float32)

            # now process the ct part
            c = np.array([img.shape[1] / 2., img.shape[0] / 2.],
                         dtype=np.float32)
            s = max(img.shape[0], img.shape[1]) * 1.0
            rot = 0

            output_res = self.output_res
            num_joints = self.num_joints
            trans_output_rot = get_affine_transform(c, s, rot,
                                                    [output_res, output_res])
            trans_output = get_affine_transform(c, s, 0,
                                                [output_res, output_res])

            ind = np.zeros(1, dtype=np.int64)
            # hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32)
            hm_hp = np.zeros((num_joints, output_res, output_res),
                             dtype=np.float32)
            kps = np.zeros(num_joints * 2, dtype=np.float32)
            kps_mask = np.zeros((self.num_joints * 2), dtype=np.uint8)
            hp_offset = np.zeros((num_joints, 2), dtype=np.float32)
            hp_ind = np.zeros(num_joints, dtype=np.int64)
            hp_mask = np.zeros(num_joints, dtype=np.int64)


            draw_gaussian = draw_msra_gaussian if self.mse_loss else \
                        draw_umich_gaussian

            pts = joints_3d.copy()
            bbox = np.array(bbox, np.float32)
            bbox_reg = np.array(bbox, np.float32)
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox = np.clip(bbox, 0, output_res - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if (h > 0 and w > 0):
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
            hp_radius = gaussian_radius(
                (math.ceil(h) * 2.3, math.ceil(w) * 2.3))
            hp_radius = self.hm_gauss \
                        if self.mse_loss else max(0, int(hp_radius))
            ind[0] = ct_int[1] * output_res + ct_int[0]
            for j in range(num_joints):
                if pts[j, 2] > 0:
                    pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot)
                    if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
                       pts[j, 1] >= 0 and pts[j, 1] < output_res:
                        kps[j * 2:j * 2 + 2] = pts[j, :2] - ct_int
                        kps_mask[j * 2:j * 2 + 2] = 1
                        pt_int = pts[j, :2].astype(np.int32)
                        # print('ct_int: ', ct_int)
                        # print('pt_int: ', pt_int)
                        hp_offset[j] = pts[j, :2] - pt_int
                        hp_ind[j] = pt_int[1] * output_res + pt_int[0]
                        hp_mask[j] = 1

                        draw_gaussian(hm_hp[j], pt_int, hp_radius)
                        # pt_ori = joints_3d[j, :2].astype(np.int32)
                        # draw_gaussian(hm_hp[j], pt_ori, hp_radius)

            ret = {'hps': kps, 'hm_hp': hm_hp, 'hp_mask': hp_mask}
            # print('kps: ', ret['hps'])
            ret.update({
                'hp_offset': hp_offset,
                'hp_ind': hp_ind,
                'hps_mask': kps_mask,
                'ind': ind
            })

        # print('hp_offset: ', hp_offset)
        joints_3d_out = joints_3d.transpose(1, 0)

        template, search = map(
            lambda x: np.transpose(x, (2, 0, 1)).astype(np.float32),
            [template, search])
        return template, search, cls, delta, \
          delta_weight, bbox_reg, \
          np.array(kp_weight, np.float32), ret, joints_3d_out
Esempio n. 10
0
    def _add_instance(
        self,
        ret,
        gt_det,
        k,
        cls_id,
        bbox,
        bbox_amodal,
        ann,
        trans_output,
        aug_s,
        calib,
        pre_cts=None,
        track_ids=None,
    ):
        h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
        if h <= 0 or w <= 0:
            return
        radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        radius = max(0, int(radius))
        ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                      dtype=np.float32)
        ct_int = ct.astype(np.int32)
        ret["cat"][k] = cls_id - 1
        ret["mask"][k] = 1
        if "wh" in ret:
            ret["wh"][k] = 1.0 * w, 1.0 * h
            ret["wh_mask"][k] = 1
        ret["ind"][k] = ct_int[1] * self.opt.output_w + ct_int[0]
        ret["reg"][k] = ct - ct_int
        ret["reg_mask"][k] = 1
        draw_umich_gaussian(ret["hm"][cls_id - 1], ct_int, radius)

        gt_det["bboxes"].append(
            np.array(
                [ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2],
                dtype=np.float32,
            ))
        gt_det["scores"].append(1)
        gt_det["clses"].append(cls_id - 1)
        gt_det["cts"].append(ct)

        if "tracking" in self.opt.heads:

            if ann["track_id"] in track_ids:
                pre_ct = pre_cts[track_ids.index(ann["track_id"])]
                ret["tracking_mask"][k] = 1
                ret["tracking"][k] = 0 * (pre_ct - ct_int)

                gt_det["tracking"].append(ret["tracking"][k])
            else:
                gt_det["tracking"].append(np.zeros(2, np.float32))

        if "ltrb" in self.opt.heads:
            ret["ltrb"][k] = (
                bbox[0] - ct_int[0],
                bbox[1] - ct_int[1],
                bbox[2] - ct_int[0],
                bbox[3] - ct_int[1],
            )
            ret["ltrb_mask"][k] = 1

        if "ltrb_amodal" in self.opt.heads:
            ret["ltrb_amodal"][k] = (
                bbox_amodal[0] - ct_int[0],
                bbox_amodal[1] - ct_int[1],
                bbox_amodal[2] - ct_int[0],
                bbox_amodal[3] - ct_int[1],
            )
            ret["ltrb_amodal_mask"][k] = 1
            gt_det["ltrb_amodal"].append(bbox_amodal)

        if "nuscenes_att" in self.opt.heads:
            if ("attributes" in ann) and ann["attributes"] > 0:
                att = int(ann["attributes"] - 1)
                ret["nuscenes_att"][k][att] = 1
                ret["nuscenes_att_mask"][k][self.nuscenes_att_range[att]] = 1
            gt_det["nuscenes_att"].append(ret["nuscenes_att"][k])

        if "velocity" in self.opt.heads:
            if ("velocity" in ann) and min(ann["velocity"]) > -1000:
                ret["velocity"][k] = np.array(ann["velocity"], np.float32)[:3]
                ret["velocity_mask"][k] = 1
            gt_det["velocity"].append(ret["velocity"][k])

        if "hps" in self.opt.heads:
            self._add_hps(ret, k, ann, gt_det, trans_output, ct_int, bbox, h,
                          w)

        if "rot" in self.opt.heads:
            self._add_rot(ret, ann, k, gt_det)

        if "dep" in self.opt.heads:
            if "depth" in ann:
                ret["dep_mask"][k] = 1
                ret["dep"][k] = ann["depth"] * aug_s
                gt_det["dep"].append(ret["dep"][k])
            else:
                gt_det["dep"].append(2)

        if "dim" in self.opt.heads:
            if "dim" in ann:
                ret["dim_mask"][k] = 1
                ret["dim"][k] = ann["dim"]
                gt_det["dim"].append(ret["dim"][k])
            else:
                gt_det["dim"].append([1, 1, 1])

        if "amodel_offset" in self.opt.heads:
            if "amodel_center" in ann:
                amodel_center = affine_transform(ann["amodel_center"],
                                                 trans_output)
                ret["amodel_offset_mask"][k] = 1
                ret["amodel_offset"][k] = amodel_center - ct_int
                gt_det["amodel_offset"].append(ret["amodel_offset"][k])
            else:
                gt_det["amodel_offset"].append([0, 0])
Esempio n. 11
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        # all anns of one img
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)

        # height, width
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)  # ori img center

        if self.opt.keep_res:  # False
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            # not keep_res, use opt.input_h, w
            # note: h != w, ori not keep_res, then set w=h=512
            # s = max(img.shape[0], img.shape[1]) * 1.0
            s = np.array([width, height], dtype=np.float32)  # ori img size?
            input_h, input_w = self.opt.input_h, self.opt.input_w

        # flip
        flipped = False

        # get scale and center to do affine transform
        if self.split == 'train':
            # random scale
            if not self.opt.not_rand_crop:
                # train set opt.not_rand_crop=False, so will use default random scale
                # s = s * np.random.choice(np.arange(0.4, 0.6, 0.1))  # (1920,1080) -> (640)
                # note: restrict the img center translate range, lrtb 1/2
                # w_border = self._get_border(img.shape[1] // 4, img.shape[1])
                # h_border = self._get_border(img.shape[0] // 4, img.shape[0])
                # random center, this may translate img so far
                w_range, h_range = img.shape[1] // 8, img.shape[0] // 8
                c[0] = np.random.randint(low=img.shape[1] // 2 - w_range,
                                         high=img.shape[1] // 2 + w_range)
                c[1] = np.random.randint(low=img.shape[0] // 2 - h_range,
                                         high=img.shape[0] // 2 + h_range)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            # random flip
            if np.random.random() < self.opt.flip:  # 0.5
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        # trans ori img to input size
        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        # use generated trans_input matrix to trans img
        inp = cv2.warpAffine(img, trans_input,
                             (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        # note: see trans img
        # print('scale:', s, 'center:', c)
        # cv2.imwrite('{}_img_trans.png'.format(img_id), inp)
        inp = (inp.astype(np.float32) / 255.)

        # color augment
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

        # normalize
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        # down sample
        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes

        # trans ori img box to output size
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        # draw gaussian core on heatmap
        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)  # 20
        # dense or sparse wh regress
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)  # (10,2) sparse!
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)  # dense!
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)  # (10,2)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)

        # msra, umich
        # opt.mse_loss = False
        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian

        # GT
        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])  # xywh -> x1y1x2y2; shape (4,)
            segmentation = np.array(ann['segmentation'][0]).reshape((-1, 2))  # x,y
            # map ori cat_id (whatever) to [0, num_class-1]
            cls_id = int(self.cat_ids[ann['category_id']])  # self.cat_ids in cigar.py
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1  # [0,2],
                segmentation[:, 0] = width - segmentation[:, 0] - 1  # flip x

            # transform box 2 pts to output
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)

            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]  # x1y1x2y2

            # transform segmentation, just trans polygon_center is enough
            polygon_center = self._get_polygon_center(segmentation)
            polygon_center = affine_transform(polygon_center, trans_output)
            print(polygon_center)

            if h > 0 and w > 0:
                # note: radius generated with spatial extent info from h,w
                radius = gaussian_radius(det_size=(math.ceil(h), math.ceil(w)))
                radius = max(0, int(math.ceil(radius / 3)))
                # radius = max(0, int(radius))
                # opt.mse_loss = False
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                # box center
                box_center = np.array([(bbox[0] + bbox[2]) / 2,
                                       (bbox[1] + bbox[3]) / 2], dtype=np.float32)
                print(box_center)
                # note: change ct to polygon center
                ct = polygon_center
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                # label of w,h
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]  # 1D ind of ct position
                # note: update offset
                reg[k] = box_center - ct_int  # float_box_center - int_polygon_center
                print('offset:', reg[k])
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)

                # use box_center to compute box
                ct = box_center.astype(np.int32)
                gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
                               ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }

        # from utils.plt_utils import plt_heatmaps
        # note: see heatmaps
        # plt_heatmaps(hm, basename='{}_hm'.format(img_id))
        # print(wh)

        if self.opt.dense_wh:  # False
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Esempio n. 12
0
    def __getitem__(self, index):
        img_id = self.ids[index]

        file_name = self.hoi_annotations[img_id]['file_name']
        img_path = os.path.join(self.root, self.image_dir, file_name)
        anns = self.hoi_annotations[img_id]['annotations']
        hoi_anns = self.hoi_annotations[img_id]['hoi_annotation']
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.7, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        hm_rel = np.zeros((self.num_classes_verb, output_h, output_w),
                          dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)

        sub_offset = np.zeros((self.max_rels, 2), dtype=np.float32)
        obj_offset = np.zeros((self.max_rels, 2), dtype=np.float32)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        gt_det = []

        bbox_ct = []
        num_rels = min(len(hoi_anns), self.max_rels)
        for k in range(num_objs):
            ann = anns[k]
            bbox = np.asarray(ann['bbox'])
            if isinstance(ann['category_id'], str):
                ann['category_id'] = int(ann['category_id'].replace('\n', ''))
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                          dtype=np.float32)

            ct_int = ct.astype(np.int32)
            bbox_ct.append(ct_int.tolist())
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                draw_gaussian(hm[cls_id], ct_int, radius)

                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        offset_mask = np.zeros((self.max_rels), dtype=np.uint8)
        rel_ind = np.zeros((self.max_rels), dtype=np.int64)
        for k in range(num_rels):
            hoi = hoi_anns[k]
            if isinstance(hoi['category_id'], str):
                hoi['category_id'] = int(hoi['category_id'].replace('\n', ''))
            hoi_cate = int(self.cat_ids_verb[hoi['category_id']])
            sub_ct = bbox_ct[hoi['subject_id']]
            obj_ct = bbox_ct[hoi['object_id']]
            offset_mask[k] = 1
            rel_ct = np.array([(sub_ct[0] + obj_ct[0]) / 2,
                               (sub_ct[1] + obj_ct[1]) / 2],
                              dtype=np.float32)
            radius = gaussian_radius((math.ceil(abs(sub_ct[0] - obj_ct[0])),
                                      math.ceil(abs(sub_ct[1] - obj_ct[1]))))
            radius = max(0, int(radius))
            radius = self.opt.hm_gauss if self.opt.mse_loss else radius
            rel_ct_int = rel_ct.astype(np.int32)
            draw_gaussian(hm_rel[hoi_cate], rel_ct_int, radius)
            rel_sub_offset = np.array(
                [rel_ct_int[0] - sub_ct[0], rel_ct_int[1] - sub_ct[1]],
                dtype=np.float32)
            rel_obj_offset = np.array(
                [rel_ct_int[0] - obj_ct[0], rel_ct_int[1] - obj_ct[1]],
                dtype=np.float32)
            sub_offset[k] = 1. * rel_sub_offset[0], 1. * rel_sub_offset[1]
            obj_offset[k] = 1. * rel_obj_offset[0], 1. * rel_obj_offset[1]
            rel_ind[k] = rel_ct_int[1] * output_w + rel_ct_int[0]

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'hm_rel': hm_rel,
            'sub_offset': sub_offset,
            'obj_offset': obj_offset,
            'offset_mask': offset_mask,
            'rel_ind': rel_ind
        }
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        return ret
Esempio n. 13
0
    def __getitem__(
            self,
            index):  #adecuar calibracion, es posible que haya que adaptarla
        img_id = self.images[index]
        img_info = self.coco.loadImgs(ids=[img_id])[0]
        img_path = os.path.join(self.img_dir, img_info['file_name'])
        img = cv2.imread(img_path)
        if 'calib' in img_info:
            calib = np.array(img_info['calib'], dtype=np.float32)
        else:
            calib = self.calib

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.])
        if self.opt.keep_res:
            s = np.array([self.opt.input_w, self.opt.input_h], dtype=np.int32)
        else:
            s = np.array([width, height], dtype=np.int32)

        aug = False
        if self.split == 'train' and np.random.random() < self.opt.aug_ddd:
            aug = True
            sf = self.opt.scale
            cf = self.opt.shift
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            c[0] += img.shape[1] * np.clip(np.random.randn() * cf, -2 * cf,
                                           2 * cf)
            c[1] += img.shape[0] * np.clip(np.random.randn() * cf, -2 * cf,
                                           2 * cf)

        trans_input = get_affine_transform(
            c, s, 0, [self.opt.input_w, self.opt.input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (self.opt.input_w, self.opt.input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        num_classes = self.opt.num_classes
        trans_output = get_affine_transform(
            c, s, 0, [self.opt.output_w, self.opt.output_h])

        hm = np.zeros((num_classes, self.opt.output_h, self.opt.output_w),
                      dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, self.opt.output_h, self.opt.output_w),
                            dtype=np.float32)
        dep = np.zeros((self.max_objs, 1), dtype=np.float32)
        #dim = np.zeros((self.max_objs, 3), dtype=np.float32) #dim no es output de ctdet
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)
        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian
        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            reg_mask[k] = 1 if not aug else 0
            if cls_id <= -99:
                continue
            #if flipped:
            #  bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.opt.output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.opt.output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((h, w))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                if cls_id < 0:
                    ignore_id = [_ for _ in range(num_classes)] \
                                if cls_id == - 1 else  [- cls_id - 2]
                    if self.opt.rect_mask:
                        hm[ignore_id,
                           int(bbox[1]):int(bbox[3]) + 1,
                           int(bbox[0]):int(bbox[2]) + 1] = 0.9999
                    else:
                        for cc in ignore_id:
                            draw_gaussian(hm[cc], ct, radius)
                        hm[ignore_id, ct_int[1], ct_int[0]] = 0.9999
                    continue
                draw_gaussian(hm[cls_id], ct, radius)

                wh[k] = 1. * w, 1. * h
                #gt_det.append([ct[0], ct[1], 1] + \
                #              self._alpha_to_8(self._convert_alpha(ann['alpha'])) + \
                #              [ann['depth']] + (np.array(ann['dim']) / 1).tolist() + [cls_id])
                #if self.opt.reg_bbox: PARECE QUE ESTO ES POR SI SE USA COCOBOX
                #  gt_det[-1] = gt_det[-1][:-1] + [w, h] + [gt_det[-1][-1]]
                dep[k] = ann['depth']
                #dim[k] = ann['dim']
                # print('        cat dim', cls_id, dim[k])
                ind[k] = ct_int[1] * self.opt.output_w + ct_int[0]
        ret = {
            'input': inp,
            'hm': hm,
            'dep': dep,
            'wh': wh,
            'ind': ind,
            'reg_mask': reg_mask
        }  #cambiado, se ha quitado dim
        if self.opt.reg_bbox:
            ret.update({'wh': wh})
        if self.opt.debug > 0 or not ('train' in self.split):
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 18), dtype=np.float32)
            meta = {
                'c': c,
                's': s,
                'gt_det':
                gt_det,  #'calib': calib, hasta que no se añada la calibracion da error, en COCO no se usa por default
                'image_path': img_path,
                'img_id': img_id
            }
            ret['meta'] = meta

        return ret
Esempio n. 14
0
    def _add_instance(self,
                      ret,
                      gt_det,
                      k,
                      cls_id,
                      bbox,
                      bbox_amodal,
                      ann,
                      trans_output,
                      aug_s,
                      calib,
                      pre_cts=None,
                      track_ids=None,
                      flipped=False):
        h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
        if h <= 0 or w <= 0:
            return
        radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        radius = max(0, int(radius))
        ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                      dtype=np.float32)
        ct_int = ct.astype(np.int32)
        ret['cat'][k] = cls_id - 1
        ret['mask'][k] = 1
        if 'wh' in ret:
            ret['wh'][k] = 1. * w, 1. * h
            ret['wh_mask'][k] = 1
        ret['ind'][k] = ct_int[1] * self.opt.output_w + ct_int[0]
        ret['reg'][k] = ct - ct_int
        ret['reg_mask'][k] = 1
        draw_umich_gaussian(ret['hm'][cls_id - 1], ct_int, radius)

        gt_det['bboxes'].append(
            np.array(
                [ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2],
                dtype=np.float32))
        gt_det['scores'].append(1)
        gt_det['clses'].append(cls_id - 1)
        gt_det['cts'].append(ct)

        if 'tracking' in self.opt.heads:
            if ann['track_id'] in track_ids:
                pre_ct = pre_cts[track_ids.index(ann['track_id'])]
                ret['tracking_mask'][k] = 1
                ret['tracking'][k] = pre_ct - ct_int
                gt_det['tracking'].append(ret['tracking'][k])
            else:
                gt_det['tracking'].append(np.zeros(2, np.float32))

        if 'ltrb' in self.opt.heads:
            ret['ltrb'][k] = bbox[0] - ct_int[0], bbox[1] - ct_int[1], \
              bbox[2] - ct_int[0], bbox[3] - ct_int[1]
            ret['ltrb_mask'][k] = 1

        if 'ltrb_amodal' in self.opt.heads:
            ret['ltrb_amodal'][k] = \
              bbox_amodal[0] - ct_int[0], bbox_amodal[1] - ct_int[1], \
              bbox_amodal[2] - ct_int[0], bbox_amodal[3] - ct_int[1]
            ret['ltrb_amodal_mask'][k] = 1
            gt_det['ltrb_amodal'].append(bbox_amodal)

        if 'nuscenes_att' in self.opt.heads:
            if ('attributes' in ann) and ann['attributes'] > 0:
                att = int(ann['attributes'] - 1)
                ret['nuscenes_att'][k][att] = 1
                ret['nuscenes_att_mask'][k][self.nuscenes_att_range[att]] = 1
            gt_det['nuscenes_att'].append(ret['nuscenes_att'][k])

        if 'velocity' in self.opt.heads:
            if ('velocity' in ann) and min(ann['velocity']) > -1000:
                ret['velocity'][k] = np.array(ann['velocity'], np.float32)[:3]
                ret['velocity_mask'][k] = 1
            gt_det['velocity'].append(ret['velocity'][k])

        if 'hps' in self.opt.heads:
            self._add_hps(ret, k, ann, gt_det, trans_output, ct_int, bbox, h,
                          w)

        if 'rot' in self.opt.heads:
            self._add_rot(ret, ann, k, gt_det)

        if 'dep' in self.opt.heads:
            if 'depth' in ann:
                ret['dep_mask'][k] = 1
                ret['dep'][k] = ann['depth'] * aug_s
                gt_det['dep'].append(ret['dep'][k])
            else:
                gt_det['dep'].append(2)

        if 'dim' in self.opt.heads:
            if 'dim' in ann:
                ret['dim_mask'][k] = 1
                ret['dim'][k] = ann['dim']
                gt_det['dim'].append(ret['dim'][k])
            else:
                gt_det['dim'].append([1, 1, 1])

        if 'amodel_offset' in self.opt.heads:
            if 'amodel_center' in ann:
                amodel_center = affine_transform(ann['amodel_center'],
                                                 trans_output)
                ret['amodel_offset_mask'][k] = 1
                ret['amodel_offset'][k] = amodel_center - ct_int
                gt_det['amodel_offset'].append(ret['amodel_offset'][k])
            else:
                gt_det['amodel_offset'].append([0, 0])

        #######track seg
        if 'seg' in self.opt.heads:
            if ann['segmentation'] != None:
                segment = self.coco.annToMask(ann)
            if flipped:
                if ann['segmentation'] != None:
                    segment = segment[:, ::-1]
            if ann['segmentation'] != None:
                segment = cv2.warpAffine(
                    segment,
                    trans_output, (self.opt.output_w, self.opt.output_h),
                    flags=cv2.INTER_LINEAR)
                segment = segment.astype(np.float32)
                segment_mask = np.ones_like(segment)
                pad_rate = 0.1
                x,y = (np.clip([ct[0] - (1 + pad_rate)*w/2 ,ct[0] + (1 + pad_rate)*w/2 ],0,self.opt.output_w - 1)).astype(np.int), \
                      (np.clip([ct[1] - (1 + pad_rate)*h/2 , ct[1] + (1 + pad_rate)*h/2],0,self.opt.output_h - 1)).astype(np.int)
                segment_mask[y[0]:y[1], x[0]:x[1]] = 0
                segment[segment > 0] = 1
                segment[segment_mask == 1] = 255
                ret['seg'][k] = segment