def _get_input(self, img, trans_input):
   inp = cv2.warpAffine(img, trans_input, 
                       (self.opt.input_w, self.opt.input_h),
                       flags=cv2.INTER_LINEAR)
   
   inp = (inp.astype(np.float32) / 255.)
   if self.split == 'train' and not self.opt.no_color_aug:
     color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
   inp = (inp - self.mean) / self.std
   inp = inp.transpose(2, 0, 1)
   return inp
    def _get_data(self, position):
        img_id = self.images[self._indexes[position]]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.params.max_objs)

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                assert (
                    len(img.shape) == 3
                ), f"The dimensions of img should be 3. Filename: {img_path}, shape: {img.shape}"
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1
        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._rng, inp, self.params._eig_val,
                      self.params._eig_vec)
        inp = (inp - self.params.mean) / self.params.std

        if self.mixed_precision:
            inp = fast_pad(inp)
            # Transpose to NCHW if channel_last is not enabled
        if not self.channel_last:
            inp = inp.transpose(2, 0, 1)
        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.params.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        ind = np.zeros((self.params.max_objs), dtype=np.int32)
        wh = np.zeros((self.params.max_objs, 2), dtype=np.float32)
        reg = np.zeros((self.params.max_objs, 2), dtype=np.float32)
        reg_mask = np.zeros((self.params.max_objs, 1), dtype=np.float32)
        cls = np.zeros((self.params.max_objs, 1), dtype=np.int32)

        draw_gaussian = draw_umich_gaussian
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.params.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cls[k] = cls_id
        # Transpose heatmap to NHWC if channel last is enabled
        if self.channel_last:
            hm = np.transpose(hm, (1, 2, 0))
        ret = (inp, hm, ind, wh, reg, reg_mask, cls)
        return ret
Exemple #3
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)
        if img is None:
            print(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Exemple #4
0
    def __getitem__(self, index):
        # print('--------------->>>> multi pose index',index)
        # print('--------------->>>> multi pose index',index)
        # print('--------------->>>> multi pose index',index)
        # print('--------------->>>> multi pose index',index)
        img_id = self.images[index]
        # print('--------------->>>> multi pose ',img_id)
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0
        rot = 0

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            if np.random.random() < self.opt.aug_rot:
                rf = self.opt.rotate
                rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(
            c, s, rot, [self.opt.input_res, self.opt.input_res])
        inp = cv2.warpAffine(img,
                             trans_input,
                             (self.opt.input_res, self.opt.input_res),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_res = self.opt.output_res
        num_joints = self.num_joints
        trans_output_rot = get_affine_transform(c, s, rot,
                                                [output_res, output_res])
        trans_output = get_affine_transform(c, s, 0, [output_res, output_res])

        hm = np.zeros((self.num_classes, output_res, output_res),
                      dtype=np.float32)
        hm_hp = np.zeros((num_joints, output_res, output_res),
                         dtype=np.float32)
        dense_kps = np.zeros((num_joints, 2, output_res, output_res),
                             dtype=np.float32)
        dense_kps_mask = np.zeros((num_joints, output_res, output_res),
                                  dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        kps_mask = np.zeros((self.max_objs, self.num_joints * 2),
                            dtype=np.uint8)
        hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
        hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
        hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(ann['category_id']) - 1
            pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3)
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                pts[:, 0] = width - pts[:, 0] - 1
                for e in self.flip_idx:
                    pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox = np.clip(bbox, 0, output_res - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if (h > 0 and w > 0) or (rot != 0):
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = self.opt.hm_gauss if self.opt.mse_loss else max(
                    0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_res + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                num_kpts = pts[:, 2].sum()
                if num_kpts == 0:
                    hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
                    reg_mask[k] = 0

                hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                hp_radius = self.opt.hm_gauss \
                            if self.opt.mse_loss else max(0, int(hp_radius))
                for j in range(num_joints):
                    if pts[j, 2] > 0:
                        pts[j, :2] = affine_transform(pts[j, :2],
                                                      trans_output_rot)
                        if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
                           pts[j, 1] >= 0 and pts[j, 1] < output_res:
                            kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int
                            kps_mask[k, j * 2:j * 2 + 2] = 1
                            pt_int = pts[j, :2].astype(np.int32)
                            hp_offset[k * num_joints + j] = pts[j, :2] - pt_int
                            hp_ind[k * num_joints +
                                   j] = pt_int[1] * output_res + pt_int[0]
                            hp_mask[k * num_joints + j] = 1
                            if self.opt.dense_hp:
                                # must be before draw center hm gaussian
                                draw_dense_reg(dense_kps[j],
                                               hm[cls_id],
                                               ct_int,
                                               pts[j, :2] - ct_int,
                                               radius,
                                               is_offset=True)
                                draw_gaussian(dense_kps_mask[j], ct_int,
                                              radius)
                            draw_gaussian(hm_hp[j], pt_int, hp_radius)
                draw_gaussian(hm[cls_id], ct_int, radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] +
                    h / 2, 1
                ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])
        if rot != 0:
            hm = hm * 0 + 0.9999
            reg_mask *= 0
            kps_mask *= 0
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'hps': kps,
            'hps_mask': kps_mask
        }
        if self.opt.dense_hp:
            dense_kps = dense_kps.reshape(num_joints * 2, output_res,
                                          output_res)
            dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res,
                                                    output_res)
            dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask],
                                            axis=1)
            dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res,
                                                    output_res)
            ret.update({
                'dense_hps': dense_kps,
                'dense_hps_mask': dense_kps_mask
            })
            del ret['hps'], ret['hps_mask']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.hm_hp:
            ret.update({'hm_hp': hm_hp})
        if self.opt.reg_hp_offset:
            ret.update({
                'hp_offset': hp_offset,
                'hp_ind': hp_ind,
                'hp_mask': hp_mask
            })
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 40), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Exemple #5
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)

        # """
        HM_ATT = False
        PYFLOW = True
        ONE_CLASS_ONLY = True

        if not HM_ATT:
            if PYFLOW:
                if 'uav' in self.opt.dataset:
                    seg_path = os.path.join(
                        '/store/datasets/UAV/bgsubs',
                        os.path.dirname(file_name).split('/')[-1],
                        os.path.basename(file_name).replace('jpg', 'png'))
                else:
                    seg_path = os.path.join(
                        '/store/datasets/OlderUA-Detrac/pyflow-bgsubs',
                        os.path.dirname(file_name).split('/')[-1],
                        os.path.basename(file_name).replace('jpg', 'png'))
        # """

        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)

        channel_counter = len(self.coco.getCatIds())
        if not HM_ATT:
            bboxes = {}
            for ann in anns:
                if str(ann['category_id']) in bboxes:
                    bboxes[str(ann['category_id'])].append([
                        int(ann['bbox'][0]),
                        int(ann['bbox'][1]),
                        int(ann['bbox'][0] + ann['bbox'][2]),
                        int(ann['bbox'][1] + ann['bbox'][3])
                    ])
                else:
                    bboxes[str(ann['category_id'])] = [[
                        int(ann['bbox'][0]),
                        int(ann['bbox'][1]),
                        int(ann['bbox'][0] + ann['bbox'][2]),
                        int(ann['bbox'][1] + ann['bbox'][3])
                    ]]
        # for ann in anns:
        #    bboxes.append([int(ann['bbox'][0]),
        #                  int(ann['bbox'][1]),
        #                 int(ann['bbox'][0] + ann['bbox'][2]),
        #                int(ann['bbox'][1] + ann['bbox'][3])])
        num_objs = min(len(anns), self.max_objs)
        # print(img_path)
        img = cv2.imread(img_path)
        if not HM_ATT:
            if PYFLOW:
                seg_img = cv2.imread(seg_path, 0)  # hughes

            if not PYFLOW:
                if 'coco' in img_path:
                    if 'val' in img_path:
                        seg_dir = '/store/datasets/coco/annotations/stuff_val2017_pixelmaps'
                    else:
                        seg_dir = '/store/datasets/coco/annotations/stuff_train2017_pixelmaps'
                    stuff_img = cv2.imread(
                        os.path.join(seg_dir,
                                     file_name.replace('.jpg', '.png')))
                    seg_img = np.zeros([img.shape[0], img.shape[1]])
                    seg_img[stuff_img[:, :, 0] == 0] += 1
                    seg_img[stuff_img[:, :, 1] == 214] += 1
                    seg_img[stuff_img[:, :, 2] == 255] += 1
                    seg_img[seg_img == 3] = 255
                    seg_img[seg_img < 255] = 0
                else:
                    if not ONE_CLASS_ONLY:
                        seg_img = np.zeros(
                            [channel_counter, img.shape[0], img.shape[1]])
                        for label in range(1, channel_counter + 1):
                            if str(label) in bboxes:
                                for bbox in bboxes[str(label)]:
                                    seg_img[label - 1, bbox[1]:bbox[3],
                                            bbox[0]:bbox[2]] = 255
                    else:
                        seg_img = np.zeros([img.shape[0], img.shape[1]])
                        for label in range(1, channel_counter + 1):
                            if str(label) in bboxes:
                                for bbox in bboxes[str(label)]:
                                    seg_img[bbox[1]:bbox[3],
                                            bbox[0]:bbox[2]] = 255

        # seg_img = np.zeros([img.shape[0], img.shape[1]])
        # for bbox in bboxes:
        #   seg_img[bbox[1]:bbox[3], bbox[0]:bbox[2]] = 255
        # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", os.path.basename(file_name.replace('.jpg', '_rgb.jpg'))), seg_img_rgb)
        # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", os.path.basename(file_name)), seg_img)
        # exit()
        # print("IMG_SHAPE: ", img.shape, " MEAN: ", np.mean(img))
        # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", os.path.basename(file_name)), img)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                if not HM_ATT:
                    if ONE_CLASS_ONLY:
                        seg_img = seg_img[:, ::-1]
                    else:
                        seg_img = seg_img[:, ::-1, :]
                # print('img.shape: ', img.shape)
                # print('seg_img.shape: ', seg_img.shape)
                # exit()
                # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)), img)
                # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)).replace('.jpg', '_seg.jpg'), seg_img)
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        # print('TRANS INPUT SHAPE: ', trans_input.shape)
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)

        if not HM_ATT:
            if ONE_CLASS_ONLY:
                seg_inp = cv2.warpAffine(seg_img,
                                         trans_input, (input_w, input_h),
                                         flags=cv2.INTER_LINEAR)
            else:
                seg_inp = np.zeros((seg_img.shape[0], input_w, input_h))
                for channel in range(seg_img.shape[0]):
                    seg_inp[channel, :, :] = cv2.warpAffine(
                        seg_img[channel, :, :],
                        trans_input, (input_w, input_h),
                        flags=cv2.INTER_LINEAR)

        inp = (inp.astype(np.float32) / 255.)
        if not HM_ATT:
            seg_inp = (seg_inp.astype(np.float32) / 255.)  # hughes
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

        # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)), inp)
        # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)).replace('.jpg', '_seg.jpg'), seg_inp)

        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)
        # print('MEAN: ', np.average(seg_inp))

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        if self.opt.elliptical_gt:
            draw_gaussian = draw_ellipse_gaussian
        else:
            draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                if self.opt.elliptical_gt:
                    radius_x = radius if h > w else int(radius * (w / h))
                    radius_y = radius if w >= h else int(radius * (h / w))
                    # radius_x = radius if w > h else int(radius / (w/h))
                    # radius_y = radius if h >= w else int(radius / (h/w))
                    draw_gaussian(hm[cls_id], ct_int, radius_x, radius_y)
                else:
                    draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])
        if not HM_ATT:
            if ONE_CLASS_ONLY:
                # scale_percent = 25  # percent of original size
                # width = int(seg_inp.shape[1] * scale_percent / 100)
                # height = int(seg_inp.shape[0] * scale_percent / 100)
                # dim = (width, height)
                # seg_inp = cv2.resize(seg_inp, dim, interpolation=cv2.INTER_AREA)
                seg_inp = np.expand_dims(seg_inp, 0)
        # print(seg_inp.shape)
        # print(hm.shape)
        # print(inp.shape)
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'seg': seg_inp,
            'ct_att': hm
        }  # 'seg': seg_inp}  # 'seg': np.expand_dims(seg_inp, 0)}
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta

        # ret['seg'] = ret['hm']
        # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)), (inp.transpose(1, 2, 0)* 255).astype(np.uint8))
        # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)).replace('.jpg', '_seg.jpg'), (seg_inp.squeeze(0) * 255).astype(np.uint8))
        # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/test_images/hm/", "hm_" + os.path.basename(file_name)), (hm.squeeze(0) * 255).astype(np.uint8))

        return ret
Exemple #6
0
    def __getitem__(self, index):
        img_id = self.ids[index]

        file_name = self.hoi_annotations[img_id]['file_name']
        img_path = os.path.join(self.root, self.image_dir, file_name)
        anns = self.hoi_annotations[img_id]['annotations']
        hoi_anns = self.hoi_annotations[img_id]['hoi_annotation']
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.7, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        hm_rel = np.zeros((self.num_classes_verb, output_h, output_w),
                          dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)

        sub_offset = np.zeros((self.max_rels, 2), dtype=np.float32)
        obj_offset = np.zeros((self.max_rels, 2), dtype=np.float32)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        gt_det = []

        bbox_ct = []
        num_rels = min(len(hoi_anns), self.max_rels)
        for k in range(num_objs):
            ann = anns[k]
            bbox = np.asarray(ann['bbox'])
            if isinstance(ann['category_id'], str):
                ann['category_id'] = int(ann['category_id'].replace('\n', ''))
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                          dtype=np.float32)

            ct_int = ct.astype(np.int32)
            bbox_ct.append(ct_int.tolist())
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                draw_gaussian(hm[cls_id], ct_int, radius)

                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        offset_mask = np.zeros((self.max_rels), dtype=np.uint8)
        rel_ind = np.zeros((self.max_rels), dtype=np.int64)
        for k in range(num_rels):
            hoi = hoi_anns[k]
            if isinstance(hoi['category_id'], str):
                hoi['category_id'] = int(hoi['category_id'].replace('\n', ''))
            hoi_cate = int(self.cat_ids_verb[hoi['category_id']])
            sub_ct = bbox_ct[hoi['subject_id']]
            obj_ct = bbox_ct[hoi['object_id']]
            offset_mask[k] = 1
            rel_ct = np.array([(sub_ct[0] + obj_ct[0]) / 2,
                               (sub_ct[1] + obj_ct[1]) / 2],
                              dtype=np.float32)
            radius = gaussian_radius((math.ceil(abs(sub_ct[0] - obj_ct[0])),
                                      math.ceil(abs(sub_ct[1] - obj_ct[1]))))
            radius = max(0, int(radius))
            radius = self.opt.hm_gauss if self.opt.mse_loss else radius
            rel_ct_int = rel_ct.astype(np.int32)
            draw_gaussian(hm_rel[hoi_cate], rel_ct_int, radius)
            rel_sub_offset = np.array(
                [rel_ct_int[0] - sub_ct[0], rel_ct_int[1] - sub_ct[1]],
                dtype=np.float32)
            rel_obj_offset = np.array(
                [rel_ct_int[0] - obj_ct[0], rel_ct_int[1] - obj_ct[1]],
                dtype=np.float32)
            sub_offset[k] = 1. * rel_sub_offset[0], 1. * rel_sub_offset[1]
            obj_offset[k] = 1. * rel_obj_offset[0], 1. * rel_obj_offset[1]
            rel_ind[k] = rel_ct_int[1] * output_w + rel_ct_int[0]

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'hm_rel': hm_rel,
            'sub_offset': sub_offset,
            'obj_offset': obj_offset,
            'offset_mask': offset_mask,
            'rel_ind': rel_ind
        }
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        return ret
Exemple #7
0
    def __getitem__(self, index):
        mosaic_pro = random.random()
        if mosaic_pro > 0:
            img_id = self.images[index]
            img, labels = self.load_mosaic(index)
            all_ann = []
            for da_label in labels:
                da_label = da_label.tolist()
                for da_l in da_label:
                    all_ann.append(da_l)
            num_objs = min(len(all_ann), self.max_objs)
        else:
            positive_aug = random.random()
            if positive_aug > 2:
                index1 = random.randint(0, self.num_samples - 1)
                # chartlet_dir = "/home/raid5/daming/HandDataMix/TrainImg/AnnImgMix"
                img_id = self.images[index]
                img_id1 = self.images[index1]

                file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
                file_name1 = self.coco.loadImgs(ids=[img_id1])[0]['file_name']

                path_num = random.random()
                img_path = os.path.join(self.img_dir, file_name)
                img_path1 = os.path.join(self.img_dir, file_name1)
                # if path_num > 0.5:
                #   img_path = os.path.join(chartlet_dir, file_name)

                ann_ids = self.coco.getAnnIds(imgIds=[img_id])
                ann_ids1 = self.coco.getAnnIds(imgIds=[img_id1])

                anns = self.coco.loadAnns(ids=ann_ids)
                anns1 = self.coco.loadAnns(ids=ann_ids1)

                img = cv2.imread(img_path)
                img1 = cv2.imread(img_path1)
                hand_num = len(anns1)
                if hand_num > 0:
                    for ann1 in anns1:
                        ran_id = random.randint(0, 26000)
                        hand_x = ann1['bbox'][0]
                        hand_y = ann1['bbox'][1]
                        hand_w = ann1['bbox'][2]
                        hand_h = ann1['bbox'][3]
                        temp = img1[hand_y:hand_y + hand_h,
                                    hand_x:hand_x + hand_w]
                        temp_h, temp_w, c = temp.shape
                        src_h, src_w, src_c = img.shape
                        for n in range(100):
                            min_src = min(src_w, src_h)
                            max_temp = max(temp_h, temp_w)
                            if (max_temp > 0.5 * min_src):
                                break
                            if (src_w < temp_w or src_h < temp_h):
                                break
                            x_tmp = random.randint(0, src_w - temp_w)
                            y_tmp = random.randint(0, src_h - temp_h)
                            src_rect = [
                                x_tmp, y_tmp, x_tmp + temp_w, y_tmp + temp_h
                            ]
                            iou_all = 0
                            for gt in anns:
                                gt = [
                                    gt['bbox'][0], gt['bbox'][1],
                                    gt['bbox'][0] + gt['bbox'][2],
                                    gt['bbox'][1] + gt['bbox'][3]
                                ]
                                iou = self.compute_iou(gt, src_rect)
                                iou_all = iou_all + iou
                                # print(iou_all)
                                if iou_all == 0:
                                    img[y_tmp:y_tmp + temp_h,
                                        x_tmp:x_tmp + temp_w] = temp
                                    a = {
                                        'bbox': [x_tmp, y_tmp, temp_w, temp_h],
                                        'category_id': 1
                                    }
                                    anns.append(a)
                                    break
                    num_objs = min(len(anns), self.max_objs)
            else:
                img_id = self.images[index]
                file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
                # daming_dir = "/home/raid5/daming/HandDataMix/TrainImg/AnnImgMix"
                img_path = os.path.join(self.img_dir, file_name)
                # img_path1 = os.path.join(daming_dir, file_name)
                ann_ids = self.coco.getAnnIds(imgIds=[img_id])
                anns = self.coco.loadAnns(ids=ann_ids)
                num_objs = min(len(anns), self.max_objs)
                img = cv2.imread(img_path)
                # daming_num = random.random()
                # if daming_num > 0.5:
                #   img = cv2.imread(img_path)
                # else:
                #   img = cv2.imread(img_path1)

        gray_pro = random.random()
        if gray_pro > 2:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                # s = s * np.random.choice(np.arange(0.3, 1.2, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        iaa_pro = random.random()
        if iaa_pro > 2:
            aug_seq = iaa.Sequential(
                [iaa.MultiplyHueAndSaturation((0.5, 1.5), per_channel=True)])
            #   aug_seq = iaa.Sequential([
            #     iaa.Sometimes(
            #         0.5,
            #         iaa.GaussianBlur(sigma=(0, 0.5))
            #     ),
            #     iaa.LinearContrast((0.75, 1.5)),
            #     iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
            #     iaa.Multiply((0.8, 1.2), per_channel=0.2),
            # ], random_order=True)
            inp, _ = aug_seq(image=inp, bounding_boxes=None)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        # ind is the center index, reg is the offset of center point in extracted feature maps
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            if mosaic_pro > 0:
                ann = all_ann[k]
                bbox = np.array([
                    float(ann[0]),
                    float(ann[1]),
                    float(ann[2]),
                    float(ann[3])
                ],
                                dtype=np.float32)
            else:
                ann = anns[k]
                bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = 0
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                # print("- h : ", h," - w : ", w)
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta

        return ret
Exemple #8
0
    def __getitem__(self, index):
        img_path = self.images[index]
        ann_path = img_path.replace('Data', 'Annotations').replace(os.path.splitext(img_path)[-1], '.xml')
        
        anns = []
        root = ET.parse(ann_path).getroot()
        #im_w = int(root.find('size/width').text)
        #im_h = int(root.find('size/height').text)
        for obj in root.findall('object'):
            cls = obj.find('name').text
            if cls not in self.id2idx:
                continue
            cls = self.id2idx[cls]
            x1 = int(obj.find('bndbox/xmin').text)
            y1 = int(obj.find('bndbox/ymin').text)
            x2 = int(obj.find('bndbox/xmax').text)
            y2 = int(obj.find('bndbox/ymax').text)
            #x = 0.5 * (x1 + x2) / im_w
            #y = 0.5 * (y1 + y2) / im_h
            #ww = (x2 - x1) / im_w
            #hh = (y2 - y1) / im_h
            anns.append(np.array([cls, x1, y1, x2, y2], dtype=np.float32))
        #l = np.array(boxes, dtype=np.float32)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
                c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
                s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = ann[1:]
            cls_id = int(ann[0])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
                gt_det.append([ct[0] - w / 2, ct[1] - h / 2, 
                               ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])

        ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh}
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_path}
            ret['meta'] = meta
        return ret
    def __getitem__(self, index):
        template_name = self.images[index].split('\n')[0]
        img_path = os.path.join(self.img_dir, template_name + '.png')
        anno_path = os.path.join(self.annot_path, template_name + '.txt')
        anns = []
        with open(anno_path, 'r') as f:
            line = f.readline().split()
            while line:
                anns.append([int(line[0]), float(line[1]), float(line[2]),
                             float(line[3]), float(line[4]), float(line[5])])
                line = f.readline().split()
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(64, img.shape[1])
                h_border = self._get_border(64, img.shape[0])
                c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

        trans_input = get_affine_transform(
            c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img, trans_input,
                             (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        num_ct_classes = self.num_ct_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        lm_heatmaps = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        rm_heatmaps = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        ct_heatmaps = np.zeros((num_ct_classes, output_h, output_w), dtype=np.float32)
        lm_reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        rm_reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ct_reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        lm_tag = np.zeros((self.max_objs), dtype=np.int64)
        rm_tag = np.zeros((self.max_objs), dtype=np.int64)
        ct_tag = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian


        for k in range(num_objs):
            ann = anns[k]
            width_origin = ann[3] - ann[1]
            cls_id = int(self.cat_ids[ann[0]])

            ann[1:3] = affine_transform(ann[1:3], trans_output)
            ann[3:5] = affine_transform(ann[3:5], trans_output)

            ftl_p, fbl_p, fbr_p, ftr_p = self._bbox_to_points(ann[1:5], ann[5])

            flm_p = np.array([(ftl_p[0] + fbl_p[0]) / 2, (ftl_p[1] + fbl_p[1]) / 2], dtype=np.float32)
            frm_p = np.array([(ftr_p[0] + fbr_p[0]) / 2, (ftr_p[1] + fbr_p[1]) / 2], dtype=np.float32)

            fct_p = np.array([(ftl_p[0] + fbr_p[0]) / 2, (ftl_p[1] + fbr_p[1]) / 2], dtype=np.float32)

            # skip the bounding box whose points beyond the border after affine transformation and rotation
            if flm_p[0] < 0 or flm_p[0] > output_w - 1 or flm_p[1] < 0 or flm_p[1] > output_h - 1 or \
               frm_p[0] < 0 or frm_p[0] > output_w - 1 or frm_p[1] < 0 or frm_p[1] > output_h - 1:
                continue

            lm_p = flm_p.astype(np.int32)
            rm_p = frm_p.astype(np.int32)
            ct_p = fct_p.astype(np.int32)

            w = np.sqrt(np.power(flm_p[0] - frm_p[0], 2) + np.power(flm_p[1] - frm_p[1], 2))
            h = w / width_origin * 20.
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius

                draw_gaussian(lm_heatmaps[cls_id], lm_p, radius)
                draw_gaussian(rm_heatmaps[cls_id], rm_p, radius)
                draw_gaussian(ct_heatmaps[0], ct_p, radius)

                lm_tag[k] = lm_p[1] * output_w + lm_p[0]
                rm_tag[k] = rm_p[1] * output_w + rm_p[0]
                ct_tag[k] = ct_p[1] * output_w + ct_p[0]
                if ct_p[1] * output_w + ct_p[0] > 16383:
                    print(file_name)
                    print("Out of upper bound!")
                elif ct_p[1] * output_w + ct_p[0] < 0:
                    print(file_name)
                    print("Out of lower bound!")

                lm_reg[k] = flm_p - lm_p
                rm_reg[k] = frm_p - rm_p
                ct_reg[k] = fct_p - ct_p

                reg_mask[k] = 1

        if (ct_reg > 1).any():
            print("Float precision error!")

        ret = {'input': inp, 'lm': lm_heatmaps, 'rm': rm_heatmaps, 'ct': ct_heatmaps, \
               'lm_tag': lm_tag, 'rm_tag': rm_tag, 'ct_tag': ct_tag, \
               'lm_reg': lm_reg, 'rm_reg': rm_reg, 'ct_reg': ct_reg, 'reg_mask': reg_mask}

        return ret
Exemple #10
0
  def __getitem__(self, index):
    img_id = self.images[index]
    file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
    img_path = os.path.join(self.img_dir, file_name)



    ann_ids = self.coco.getAnnIds(imgIds=[img_id])
    anns = self.coco.loadAnns(ids=ann_ids)

    num_objs = min(len(anns), self.max_objs)

    channel_counter = 1  # len(self.coco.getCatIds())

    # target
    # target_img = cv2.imread(img_path)

    N_FRAMES = 11
    middle = int(N_FRAMES/2)
    index = os.path.basename(img_path).replace('.jpg', '').replace('img', '').replace('.JPEG', '')
    rest = img_path.replace(index + '.jpg', '').replace(os.path.dirname(img_path), '')
    length = len(index)
    modulo = '1'
    for i in range(length):
      modulo += '0'

    img_paths = []
    for i in range(N_FRAMES):
      new_img_path = os.path.dirname(img_path) \
                     + rest \
                     + str((int(index) - (i-middle)) % int(modulo)).zfill(length) + '.jpg'
      if not os.path.exists(new_img_path):
        new_img_path = img_path
      img_paths.append(new_img_path)

    imgs = []
    for path in img_paths:
      imgs.append(cv2.imread(path))
    img = np.concatenate(imgs, -1)

    bboxes = {}
    for ann in anns:
      if str(ann['category_id']) in bboxes:
        bboxes[str(ann['category_id'])].append([int(ann['bbox'][0]),
                      int(ann['bbox'][1]),
                     int(ann['bbox'][0] + ann['bbox'][2]),
                      int(ann['bbox'][1] + ann['bbox'][3])])
      else:
        bboxes[str(ann['category_id'])] = [[int(ann['bbox'][0]),
                                     int(ann['bbox'][1]),
                                     int(ann['bbox'][0] + ann['bbox'][2]),
                                     int(ann['bbox'][1] + ann['bbox'][3])]]
    seg_img = np.zeros([channel_counter, img.shape[0], img.shape[1]])
    for label in range(1, channel_counter+1):
      if str(label) in bboxes:
        for bbox in bboxes[str(label)]:
          seg_img[label-1, bbox[1]:bbox[3], bbox[0]:bbox[2]] = 255


    height, width = img.shape[0], img.shape[1]
    c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
    if self.opt.keep_res:
      input_h = (height | self.opt.pad) + 1
      input_w = (width | self.opt.pad) + 1
      s = np.array([input_w, input_h], dtype=np.float32)
    else:
      s = max(img.shape[0], img.shape[1]) * 1.0
      input_h, input_w = self.opt.input_h, self.opt.input_w

    flipped = False
    if self.split == 'train':
      if not self.opt.not_rand_crop:
        s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
        w_border = self._get_border(128, img.shape[1])
        h_border = self._get_border(128, img.shape[0])
        c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
        c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
      else:
        sf = self.opt.scale
        cf = self.opt.shift
        c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
        c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
        s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)

      if np.random.random() < self.opt.flip:
        flipped = True

        # target
        # target_img = target_img[:, ::-1, :]

        img = img[:, ::-1, :]
        c[0] =  width - c[0] - 1


    trans_input = get_affine_transform(
      c, s, 0, [input_w, input_h])
    seg_inp = np.zeros((seg_img.shape[0], input_w, input_h))
    for channel in range(seg_img.shape[0]):
      seg_inp[channel, :, :] = cv2.warpAffine(seg_img[channel, :, :], trans_input,
                                              (input_w, input_h),
                                              flags=cv2.INTER_LINEAR)
    # print('pre: ', img.shape)
    # target
    # target_inp = cv2.warpAffine(target_img, trans_input,(input_w, input_h),flags=cv2.INTER_LINEAR)

    inp = np.zeros((input_w, input_h, N_FRAMES*3))
    if inp.shape[2] == N_FRAMES*3:
      for i in range(N_FRAMES):
        inp[:, :, i*3:i*3+3] = cv2.warpAffine(img[:, :, i*3:i*3+3], trans_input,
                              (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
    # print('post: ', inp.shape)
    # target
    # target_inp = (target_inp.astype(np.float32) / 255.)

    inp = (inp.astype(np.float32) / 255.)

    seg_inp = (seg_inp.astype(np.float32) / 255.) # hughes


    # print('np.mean(inp), PRE: ', np.mean(inp))
    if inp.shape[2] == N_FRAMES*3:
      for i in range(N_FRAMES):
        if self.split == 'train' and not self.opt.no_color_aug:
          color_aug(self._data_rng, inp[:, :, i*3:i*3+3], self._eig_val, self._eig_vec)
    else:
      if self.split == 'train' and not self.opt.no_color_aug:
        color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

    # target
    # color_aug(self._data_rng, target_inp, self._eig_val, self._eig_vec)
    
    # print('np.mean(inp), POST: ', np.mean(inp))
    if inp.shape[2] == N_FRAMES*3:
      for i in range(N_FRAMES):
        inp[:, :, i*3:i*3+3] = (inp[:, :, i*3:i*3+3] - self.mean) / self.std
    else:
      inp = (inp - self.mean) / self.std
    # target
    # target_inp = (target_inp - self.mean) / self.std

    inp = inp.transpose(2, 0, 1)

    # target
    # target_inp = target_inp.transpose(2, 0, 1)

    output_h = input_h // self.opt.down_ratio
    output_w = input_w // self.opt.down_ratio
    num_classes = self.num_classes
    trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

    hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
    wh = np.zeros((self.max_objs, 2), dtype=np.float32)
    dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
    reg = np.zeros((self.max_objs, 2), dtype=np.float32)
    ind = np.zeros((self.max_objs), dtype=np.int64)
    reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
    cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
    cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)

    draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                    draw_umich_gaussian

    gt_det = []
    for k in range(num_objs):
      ann = anns[k]
      bbox = self._coco_box_to_bbox(ann['bbox'])
      cls_id = int(self.cat_ids[ann['category_id']])
      if flipped:
        bbox[[0, 2]] = width - bbox[[2, 0]] - 1
      bbox[:2] = affine_transform(bbox[:2], trans_output)
      bbox[2:] = affine_transform(bbox[2:], trans_output)
      bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
      bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
      h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
      if h > 0 and w > 0:
        radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        radius = max(0, int(radius))
        radius = self.opt.hm_gauss if self.opt.mse_loss else radius
        ct = np.array(
          [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
        ct_int = ct.astype(np.int32)
        draw_gaussian(hm[cls_id], ct_int, radius)
        wh[k] = 1. * w, 1. * h
        ind[k] = ct_int[1] * output_w + ct_int[0]
        reg[k] = ct - ct_int
        reg_mask[k] = 1
        cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
        cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
        if self.opt.dense_wh:
          draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
        gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
                       ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])

    # write_hm = cv2.resize(((hm-np.min(hm)/np.max(hm))*255).astype(np.uint8).squeeze(0), (512, 512))
    # cv2.imwrite('/store/datasets/UA-Detrac/test_sample/VID_HM/'  + 'inp_' + os.path.basename(file_name) + '_' + 'HM.jpg', write_hm)

    ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'seg': seg_inp}  # 'seg': np.expand_dims(seg_inp, 0)}
    if self.opt.dense_wh:
      hm_a = hm.max(axis=0, keepdims=True)
      dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
      ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
      del ret['wh']
    elif self.opt.cat_spec_wh:
      ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
      del ret['wh']
    if self.opt.reg_offset:
      ret.update({'reg': reg})
    if self.opt.debug > 0 or not self.split == 'train':
      gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
               np.zeros((1, 6), dtype=np.float32)
      meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
      ret['meta'] = meta

    # if inp.shape[0] == N_FRAMES*3:
    #   for i in range(N_FRAMES):
    #     img_test = (inp[i*3:i*3+3, :, :].transpose(1, 2, 0) * 255).astype(np.uint8)
    #     cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/test_sample/VID_HM/", 'inp_' + os.path.basename(file_name) + '_' + str(i)), img_test)

    #img_test = (target_inp.transpose(1, 2, 0) * 255).astype(np.uint8)
    # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/exp/tensors/VID_HM/", os.path.basename(file_name) + '_target'), img_test)

    # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)).replace('.jpg', '_seg.jpg'), (seg_inp.transpose(1, 2, 0) * 255).astype(np.uint8))

    # exit()
    return ret
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), cfg.max_objs)

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)

        if self.split == 'train':
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = cfg.train_resolution[0], cfg.train_resolution[1]
        else:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)

        flipped = False
        if self.split == 'train':
            s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
            w_border = get_border(128, img.shape[1])
            h_border = get_border(128, img.shape[0])

            c[0] = np.random.randint(low=w_border,
                                     high=img.shape[1] - w_border)
            c[1] = np.random.randint(low=h_border,
                                     high=img.shape[0] - h_border)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_matrix = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_matrix, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = inp.astype(np.float32) / 255.

        # TODO:inp appears numbers below 0 after color_aug (myself)
        if self.split == 'train':
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

        inp = (inp - cfg.mean) / cfg.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // cfg.down_ratio
        output_w = input_w // cfg.down_ratio
        trans_matrix = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((self.num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((cfg.max_objs, 2), dtype=np.float32)
        reg = np.zeros((cfg.max_objs, 2), dtype=np.float32)
        ind = np.zeros(cfg.max_objs, dtype=np.int64)
        reg_mask = np.zeros(cfg.max_objs, dtype=np.uint8)

        gt_box = []
        for i in range(num_objs):
            ann = anns[i]
            bbox = coco2x1y1x2y2(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_matrix)
            bbox[2:] = affine_transform(bbox[2:], trans_matrix)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            if h > 0 and w > 0:
                # get an object size-adapative radius
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)

                draw_umich_gaussian(hm[cls_id], ct_int, radius)

                wh[i] = 1. * w, 1. * h
                ind[i] = ct_int[1] * output_w + ct_int[0]
                reg[i] = ct - ct_int
                reg_mask[i] = 1

                gt_box.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        ret = {
            'input': inp,
            'hm': hm,
            'reg': reg,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }

        if self.opt.debug > 0 or not self.split == 'train':
            gt_box = np.array(
                gt_box, dtype=np.float32) if len(gt_box) > 0 else np.zeros(
                    (1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_box, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Exemple #12
0
    def __getitem__(self, index):
        img_id = self.images[index]
        if img_id < 7481:
            img_id_r = img_id + 7481
        else:
            img_id_r = img_id - 7481
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        file_name_r = "{:06d}".format(img_id_r) + '.png'
        img_path = os.path.join(self.img_dir, file_name)
        img_path_r = os.path.join(self.img_dir, file_name_r)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)
        img = cv2.imread(img_path)
        img_r = cv2.imread(img_path_r)
        img_shape = img.shape[:2]

        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0
        rot = 0
        c_r = np.array([img_r.shape[1] / 2., img_r.shape[0] / 2.],
                       dtype=np.float32)
        s_r = max(img_r.shape[0], img_r.shape[1]) * 1.0

        trans_input_l = get_affine_transform(
            c, s, rot, [self.opt.input_w, self.opt.input_h])
        trans_input_r = get_affine_transform(
            c_r, s_r, rot, [self.opt.input_w, self.opt.input_h])

        inp = cv2.warpAffine(
            img,
            trans_input_l,
            (self.opt.input_w, self.opt.input_h),
            #(self.opt.input_res, self.opt.input_res),
            flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        inp_r = cv2.warpAffine(
            img_r,
            trans_input_r,
            (self.opt.input_w, self.opt.input_h),
            # (self.opt.input_res, self.opt.input_res),
            flags=cv2.INTER_LINEAR)
        inp_r = (inp_r.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp_r, self._eig_val, self._eig_vec)
        inp_r = (inp_r - self.mean) / self.std
        inp_r = inp_r.transpose(2, 0, 1)
        trans_output_l = np.zeros((self.opt.pynum, 2, 3), dtype=np.float32)
        trans_output_r = np.zeros((self.opt.pynum, 2, 3), dtype=np.float32)
        for j in range(self.opt.pynum):
            down_ratio = math.pow(2, j + 1)
            trans_output_l[j, :, :] = get_affine_transform(
                c, s, rot, [
                    self.opt.input_w // down_ratio,
                    self.opt.input_h // down_ratio
                ])
            trans_output_r[j, :, :] = get_affine_transform(
                c_r, s_r, rot, [
                    self.opt.input_w // down_ratio,
                    self.opt.input_h // down_ratio
                ])
        dim = np.zeros((self.max_objs, 3), dtype=np.float32)
        ori = np.zeros((self.max_objs), dtype=np.float32)
        pos = np.zeros((self.max_objs, 3), dtype=np.float32)

        dim_real = np.zeros((self.max_objs, 3), dtype=np.float32)
        ori_real = np.zeros((self.max_objs), dtype=np.float32)
        pos_real = np.zeros((self.max_objs, 3), dtype=np.float32)

        dim_est = np.zeros((self.max_objs, 3), dtype=np.float32)
        ori_est = np.zeros((self.max_objs, 3, 3), dtype=np.float32)
        ori_est_scalar = np.zeros((self.max_objs), dtype=np.float32)
        pos_est = np.zeros((self.max_objs, 3), dtype=np.float32)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)

        calib = np.array(anns[0]['calib_l'], dtype=np.float32)
        calib = np.reshape(calib, (3, 4))
        calib_r = np.array(anns[0]['calib_r'], dtype=np.float32)
        calib_r = np.reshape(calib_r, (3, 4))
        if self.split == 'val':
            for k in range(self.max_objs):
                if k + 1 > num_objs:
                    kk = random.randint(0, num_objs - 1)
                    ann = anns[kk]
                else:

                    ann = anns[k]
                reg_mask[k] = 1
                dim_est[k][0] = ann['dim'][0] + random.uniform(-0.8, 0.8)
                dim_est[k][1] = ann['dim'][1] + random.uniform(-0.8, 0.8)
                dim_est[k][2] = ann['dim'][2] + random.uniform(-0.8, 0.8)
                ori[k] = random.uniform(-0.3, 0.3)
                ori_est_scalar[k] = ann['rotation_y'] - ori[k]
                ori_est[k] = self.E2R(ann['rotation_y'] - ori[k])
                pos_est[k][0] = ann['location'][0] + random.uniform(-1, 1)
                pos_est[k][1] = ann['location'][1] + random.uniform(-0.5, 0.5)
                pos_est[k][2] = ann['location'][2] + random.uniform(-2, 2)

                dim[k][0] = ann['dim'][0] - dim_est[k][0]
                dim[k][1] = ann['dim'][1] - dim_est[k][1]
                dim[k][2] = ann['dim'][2] - dim_est[k][2]

                pos[k][0] = ann['location'][0] - pos_est[k][0]
                pos[k][1] = ann['location'][1] - pos_est[k][1]
                pos[k][2] = ann['location'][2] - pos_est[k][2]

                dim_real[k][0] = ann['dim'][0]
                dim_real[k][1] = ann['dim'][1]
                dim_real[k][2] = ann['dim'][2]

                pos_real[k][0] = ann['location'][0]
                pos_real[k][1] = ann['location'][1]
                pos_real[k][2] = ann['location'][2]
                ori_real[k] = ann['rotation_y']
        if self.split == 'train':
            for k in range(self.max_objs):
                if k + 1 > num_objs:
                    kk = random.randint(0, num_objs - 1)
                    ann = anns[kk]
                else:
                    ann = anns[k]
                reg_mask[k] = 1
                if np.random.random() < 0.7:
                    dim_est[k][0] = ann['dim'][0] + random.uniform(-1.5, 1.5)
                    dim_est[k][1] = ann['dim'][1] + random.uniform(-1.5, 1.5)
                    dim_est[k][2] = ann['dim'][2] + random.uniform(-1.5, 1.5)
                    ori[k] = random.uniform(-0.6, 0.6)
                    ori_est_scalar[k] = ann['rotation_y'] - ori[k]
                    ori_est[k] = self.E2R(ann['rotation_y'] - ori[k])
                    pos_est[k][0] = ann['location'][0] + random.uniform(-2, 2)
                    pos_est[k][1] = ann['location'][1] + random.uniform(
                        -0.8, 0.8)
                    pos_est[k][2] = ann['location'][2] + random.uniform(-3, 3)
                else:
                    dim_est[k][0] = ann['dim'][0] + random.uniform(-0.5, 0.5)
                    dim_est[k][1] = ann['dim'][1] + random.uniform(-0.5, 0.5)
                    dim_est[k][2] = ann['dim'][2] + random.uniform(-0.5, 0.5)
                    ori[k] = random.uniform(-0.3, 0.3)
                    ori_est_scalar[k] = ann['rotation_y'] - ori[k]
                    ori_est[k] = self.E2R(ann['rotation_y'] - ori[k])
                    pos_est[k][0] = ann['location'][0] + random.uniform(
                        -0.8, 0.8)
                    pos_est[k][1] = ann['location'][1] + random.uniform(
                        -0.3, 0.3)
                    pos_est[k][2] = ann['location'][2] + random.uniform(-1, 1)
                dim[k][0] = ann['dim'][0] - dim_est[k][0]
                dim[k][1] = ann['dim'][1] - dim_est[k][1]
                dim[k][2] = ann['dim'][2] - dim_est[k][2]

                pos[k][0] = ann['location'][0] - pos_est[k][0]
                pos[k][1] = ann['location'][1] - pos_est[k][1]
                pos[k][2] = ann['location'][2] - pos_est[k][2]

                dim_real[k][0] = ann['dim'][0]
                dim_real[k][1] = ann['dim'][1]
                dim_real[k][2] = ann['dim'][2]

                pos_real[k][0] = ann['location'][0]
                pos_real[k][1] = ann['location'][1]
                pos_real[k][2] = ann['location'][2]
                ori_real[k] = ann['rotation_y']

            #reg_mask[k]=1

        meta = {}
        meta['img_shape'] = img_shape
        meta['num_objs'] = num_objs
        meta['img_name'] = file_name
        ret = {
            'input': inp,
            'input_r': inp_r,
            'dim': dim,
            'ori': ori,
            'pos': pos,
            'dim_real': dim_real,
            'ori_real': ori_real,
            'pos_real': pos_real,
            'dim_est': dim_est,
            'ori_est': ori_est,
            'pos_est': pos_est,
            'ori_est_scalar': ori_est_scalar,
            'calib_l': calib,
            'calib_r': calib_r,
            'trans_output_l': trans_output_l,
            'trans_output_r': trans_output_r,
            'reg_mask': reg_mask,
            'meta': meta
        }
        return ret
Exemple #13
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        # (filepath, tempfilename) = os.path.split(img_path)
        # (filename, extension) = os.path.splitext(tempfilename)
        # kps_path = os.path.join('/media/srt/resource/Halcon_Project/L_shelf_dataset/HG_Dataset/kps',
        #                         filename + '_kps.npy')
        # kps_ann = np.load(kps_path)
        # print('load the kps!!!', kps_path)
        # c3= np.ones(6)
        # kps=np.column_stack((kps_ann,c3))
        # print(kps)

        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)

        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                # print('Random Crop Done')
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
                #输出scale的数值
                # print('s is :',s)
            else:
                # print('Do not Random Crop')
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        #对输入执行仿射变换
        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)
        #保存inp
        # print('type of inp is:',type(inp))
        # print('size of inp is:', inp.shape)
        #3通道的图像取一个维度就可以
        test_image = inp[1]

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                #将高斯heatmap保存下来
                # print('shape of hp: ',hm.shape)
                heatmap = np.squeeze(hm)
                heatmap = cv2.resize(heatmap, (960, 640),
                                     interpolation=cv2.INTER_CUBIC)
                new_image = test_image + heatmap * 2
                array_name = 'visual_ann_' + str(index) + '.png'
                matplotlib.image.imsave(array_name, new_image)

                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Exemple #14
0
    def __getitem__(self, index):

        ######################################## Start of modified Code Block #####################################################
        curr_example = self.all_frames[index]
        img_path = curr_example[0]
        anns = curr_example[1]
        ######################################## End of modified Code Block #####################################################

        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)
        try:
            img_shape = img.shape
            self.last_img = img
        except AttributeError:
            print("Image '{}' failed!!!".format(img_path))
            self.failed_images.add(img_path)
            img = self.last_img

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:  # this is the default!
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        # GEO: we need to calculate the mean and std in datasets/dataset/gaila.py
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = len(list(self.cat_ids.keys()))
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ######################################## Start of modified Code Block #####################################################
            ann = anns.iloc[k]
            bbox = np.asarray([
                ann["topLeftX"], ann["topLeftY"], ann['bottomRightX'],
                ann['bottomRightY']
            ],
                              dtype=np.float32)
            cls_id = int(self.cat_ids[ann['name']])
            ######################################## End of modified Code Block #####################################################

            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)

            ######################################## Start of modified Code Block #####################################################
            _id = int(img_path.split('/')[-1].split('.')[0])
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': _id}
            ######################################## End of modified Code Block #####################################################

            ret['meta'] = meta
        return ret
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(
            self.img_dir,
            self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)
        img = self.coco.loadImgs(ids=[img_id])[0]
        w_img = int(img['width'])
        h_img = int(img['height'])

        labels = []
        bboxes = []
        a_bboxes = []
        shapes = []
        a_shapes = []

        for anno in annotations:
            if anno['category_id'] not in KINS_IDS:
                continue  # excludes 3: person-sitting class for evaluation

            a_polygons = anno['segmentation'][
                0]  # only one mask for each instance
            polygons = anno['i_segm'][0]

            # gt_x1, gt_y1, gt_w, gt_h = anno['a_bbox']  # this is used to clip resampled polygons
            a_contour = np.array(a_polygons).reshape((-1, 2))
            contour = np.array(polygons).reshape((-1, 2))

            # Downsample the contour to fix number of vertices
            if cv2.contourArea(contour.astype(
                    np.int32)) < 5:  # remove tiny objects
                continue
            fixed_contour = uniformsample(a_contour, self.n_vertices)
            i_contour = uniformsample(contour, self.n_vertices)

            # fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w)
            # fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h)

            # contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0) ** 2))
            # if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan:  # invalid shapes
            #     continue

            shapes.append(np.ndarray.flatten(i_contour).tolist())
            a_shapes.append(np.ndarray.flatten(fixed_contour).tolist())
            labels.append(self.cat_ids[anno['category_id']])
            bboxes.append(anno['bbox'])
            a_bboxes.append(anno['a_bbox'])

        labels = np.array(labels)
        bboxes = np.array(bboxes, dtype=np.float32)
        a_bboxes = np.array(a_bboxes, dtype=np.float32)
        shapes = np.array(shapes, dtype=np.float32)
        a_shapes = np.array(a_shapes, dtype=np.float32)

        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            a_bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])
            shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32)
            a_shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32)

        bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy
        a_bboxes[:, 2:] += a_bboxes[:, :2]

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.],
                          dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(360, width)
            h_border = get_border(160, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        trans_img = get_affine_transform(
            center, scale, 0, [self.img_size['w'], self.img_size['h']])
        # -----------------------------------debug---------------------------------
        # image_show = img.copy()

        img = cv2.warpAffine(img, trans_img,
                             (self.img_size['w'], self.img_size['h']))

        img = img.astype(np.float32) / 255.

        if self.split == 'train':
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        trans_fmap = get_affine_transform(
            center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])
        # -----------------------------------debug---------------------------------
        # image_show = cv2.warpAffine(image_show, trans_fmap, (self.fmap_size['w'], self.fmap_size['h']))

        hmap = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # heatmap of centers
        occ_map = np.zeros(
            (1, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # grayscale map for occlusion levels
        w_h_ = np.zeros((self.max_objs, 2),
                        dtype=np.float32)  # width and height of inmodal bboxes
        shapes_ = np.zeros((self.max_objs, self.n_vertices * 2),
                           dtype=np.float32)  # gt amodal segmentation polygons
        center_offsets = np.zeros(
            (self.max_objs, 2),
            dtype=np.float32)  # gt amodal mass centers to inmodal bbox center
        codes_ = np.zeros((self.max_objs, self.n_codes),
                          dtype=np.float32)  # gt amodal coefficients
        regs = np.zeros((self.max_objs, 2),
                        dtype=np.float32)  # regression for quantization error
        inds = np.zeros((self.max_objs, ), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)
        votes_ = np.zeros((self.max_objs, self.vote_length),
                          dtype=np.float32)  # voting for heatmaps

        for k, (bbox, a_bbox, label, shape, a_shape) in enumerate(
                zip(bboxes, a_bboxes, labels, shapes, a_shapes)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                a_bbox[[0, 2]] = width - a_bbox[[2, 0]] - 1
                # Flip the contour x-axis
                for m in range(self.n_vertices):
                    a_shape[2 * m] = width - a_shape[2 * m] - 1
                    shape[2 * m] = width - shape[2 * m] - 1

            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[
                0]  # This box is the inmodal boxes

            a_bbox[:2] = affine_transform(a_bbox[:2], trans_fmap)
            a_bbox[2:] = affine_transform(a_bbox[2:], trans_fmap)
            a_bbox[[0, 2]] = np.clip(a_bbox[[0, 2]], 0,
                                     self.fmap_size['w'] - 1)
            a_bbox[[1, 3]] = np.clip(a_bbox[[1, 3]], 0,
                                     self.fmap_size['h'] - 1)

            # generate gt shape mean and std from contours
            for m in range(self.n_vertices
                           ):  # apply scale and crop transform to shapes
                a_shape[2 * m:2 * m + 2] = affine_transform(
                    a_shape[2 * m:2 * m + 2], trans_fmap)
                shape[2 * m:2 * m + 2] = affine_transform(
                    shape[2 * m:2 * m + 2], trans_fmap)

            shape_clipped = np.reshape(a_shape, (self.n_vertices, 2))
            shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0,
                                          self.fmap_size['w'] - 1)
            shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0,
                                          self.fmap_size['h'] - 1)

            i_shape_clipped = np.reshape(shape, (self.n_vertices, 2))
            i_shape_clipped[:, 0] = np.clip(i_shape_clipped[:, 0], 0,
                                            self.fmap_size['w'] - 1)
            i_shape_clipped[:, 1] = np.clip(i_shape_clipped[:, 1], 0,
                                            self.fmap_size['h'] - 1)

            clockwise_flag = check_clockwise_polygon(shape_clipped)
            if not clockwise_flag:
                fixed_contour = np.flip(shape_clipped, axis=0)
            else:
                fixed_contour = shape_clipped.copy()
            # Indexing from the left-most vertex, argmin x-axis
            idx = np.argmin(fixed_contour[:, 0])
            indexed_shape = np.concatenate(
                (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0)

            mass_center = np.mean(indexed_shape, axis=0)
            if h < 1e-6 or w < 1e-6:  # remove small bboxes
                continue

            centered_shape = indexed_shape - mass_center  # these are amodal mask shapes

            if h > 0 and w > 0:
                obj_c = np.array([(bbox[0] + bbox[2]) / 2,
                                  (bbox[1] + bbox[3]) / 2],
                                 dtype=np.float32)
                obj_c_int = obj_c.astype(np.int32)

                radius = max(
                    0,
                    int(
                        gaussian_radius((math.ceil(h), math.ceil(w)),
                                        self.gaussian_iou)))
                draw_umich_gaussian(hmap[label], obj_c_int, radius)
                shapes_[k] = centered_shape.reshape((1, -1))

                center_offsets[k] = mass_center - obj_c
                codes_[k], _ = fast_ista(centered_shape.reshape((1, -1)),
                                         self.dictionary,
                                         lmbda=self.sparse_alpha,
                                         max_iter=60)

                a_shifted_poly = indexed_shape - np.array([
                    a_bbox[0], a_bbox[1]
                ])  # crop amodal shapes to the amodal bboxes
                amodal_obj_mask = self.polys_to_mask(
                    [np.ndarray.flatten(a_shifted_poly, order='C').tolist()],
                    a_bbox[3], a_bbox[2])

                i_shifted_poly = i_shape_clipped - np.array([
                    a_bbox[0], a_bbox[1]
                ])  # crop inmodal shapes to the same amodal bboxes
                inmodal_obj_mask = self.polys_to_mask(
                    [np.ndarray.flatten(i_shifted_poly, order='C').tolist()],
                    a_bbox[3], a_bbox[2])

                obj_mask = (
                    amodal_obj_mask + inmodal_obj_mask
                ) * 255. / 2  # convert to float type in image scale
                obj_mask = cv2.resize(
                    obj_mask.astype(np.uint8),
                    dsize=(self.vote_vec_dim, self.vote_vec_dim),
                    interpolation=cv2.INTER_LINEAR) * 1.
                votes_[k] = obj_mask.reshape((1, -1)) / 255.

                w_h_[k] = 1. * w, 1. * h
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1

                # occlusion level map gt
                occ_map[0] += self.polys_to_mask(
                    [np.ndarray.flatten(indexed_shape).tolist()],
                    self.fmap_size['h'], self.fmap_size['w']) * 1.

        occ_map = np.clip(occ_map, 0, self.max_occ) / self.max_occ

        # -----------------------------------debug---------------------------------
        # for bbox, label, shape in zip(bboxes, labels, shapes_):
        #     # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1)
        #     cv2.putText(image_show, str(self.reverse_labels[label]), (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        #     # print(shape, shape.shape)
        #     cv2.polylines(image_show, [shape.reshape(self.n_vertices, 2).astype(np.int32)], True, (0, 0, 255),
        #                   thickness=1)
        # # cv2.imshow('img', image_show)
        # # cv2.imshow('occ', occ_map.astype(np.uint8).reshape(occ_map.shape[1], occ_map.shape[2]) * 255)
        # m_img = cv2.cvtColor((occ_map * 255).astype(np.uint8).reshape(occ_map.shape[1], occ_map.shape[2]),
        #                      code=cv2.COLOR_GRAY2BGR)
        # cat_img = np.concatenate([m_img, image_show], axis=0)
        # cv2.imshow('segm', cat_img)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------

        return {
            'image': img,
            'shapes': shapes_,
            'codes': codes_,
            'offsets': center_offsets,
            'occ_map': occ_map,
            'hmap': hmap,
            'w_h_': w_h_,
            'regs': regs,
            'inds': inds,
            'ind_masks': ind_masks,
            'votes': votes_,
            'c': center,
            's': scale,
            'img_id': img_id
        }
Exemple #16
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']  # 读出图像名称
        img_path = os.path.join(self.img_dir, file_name)  # 图像完成文件名称
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)  # 读取图像对应的GT检测框

        num_objs = min(len(anns), self.max_objs)
        # 读入图像,并对图像进行预处理
        # print(img_id, img_path)
        img = cv2.imread(img_path)
        # import pdb
        # pdb.set_trace()
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train' or self.split == 'debug1':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.7, 1.3, 0.1))
                w_border = self._get_border(512, img.shape[1])
                h_border = self._get_border(512, img.shape[0])
                c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
                # c[0] = np.random.randint(low=0.4*img.shape[1], high=0.6*img.shape[1] )
                # c[1] = np.random.randint(low=0.4*img.shape[0], high=0.6*img.shape[0])
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        # 根据偏移的c和s得到变换矩阵,之后所有的框也可以按照变换矩阵进行移动
        trans_input = get_affine_transform(
            c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img, trans_input,
                             (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        # 0-255转为0-1
        if DEBUG:
            raw_img = inp.copy()
        inp = (inp.astype(np.float32) / 255.)
        # 色彩偏移
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        # 减均值除方差
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)
        # 图像预处理结束

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)  # heatmap

        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)  # dense的wh
        angle = np.zeros((self.max_objs, 1), dtype=np.float32)
        dense_angle = np.zeros((1, output_h, output_w), dtype=np.float32)  # dense的angle
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)  # offset偏差值
        ind = np.zeros((self.max_objs), dtype=np.int64)  # 物体在图像上编号,编号根据坐标得到
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)  # 对于图像变化后不存在了物体mask设置为0
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)  # 分类的长宽
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)  # 分类的长宽mask
        cat_spec_angle = np.zeros((self.max_objs, num_classes), dtype=np.float32)  # 分类的长宽
        cat_spec_angle_mask = np.zeros((self.max_objs, num_classes), dtype=np.uint8)  # 分类的长宽mask

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        gt_det = []
        # 遍历所有的物体
        for k in range(num_objs):
            ann = anns[k]
            # bbox = self._coco_box_to_bbox(ann['rbbox'])
            bbox = ann['rbbox']
            cls_id = int(self.cat_ids[ann['category_id']])

            # 跟随图像变化,对于检测框进行相同变换
            if flipped:
                # cx做镜像处理
                bbox[0] = width - bbox[0] - 1

            # 获取四个角点
            pt1, pt2, pt3, pt4 = self._get_four_points((bbox[0], bbox[1]), bbox[-1], bbox[2], bbox[3])
            pt1 = affine_transform((pt1[0, 0], pt1[0, 1]), trans_output)
            pt2 = affine_transform((pt2[0, 0], pt2[0, 1]), trans_output)
            pt3 = affine_transform((pt3[0, 0], pt3[0, 1]), trans_output)
            pt4 = affine_transform((pt4[0, 0], pt4[0, 1]), trans_output)

            # 得到中心点坐标,长宽以及角度
            ct = np.array(
                [(pt1[0] + pt3[0]) / 2, (pt1[1] + pt3[1]) / 2], dtype=np.float32)
            w = np.linalg.norm(pt1 - pt2)
            h = np.linalg.norm(pt1 - pt4)
            # 计算新的angle
            # vec_base = np.array([0, 1], dtype=np.float32)
            # vec_angle = np.array([(pt1[0] + pt2[0]) / 2, (pt1[1] + pt2[1]) / 2], dtype=np.float32) - ct
            # norm_base = np.linalg.norm(vec_base)
            # norm_angle = np.linalg.norm(vec_angle)
            # cos_angle = vec_base.dot(vec_angle) / (norm_base * norm_angle + np.finfo(float).eps)
            # a = np.arccos(cos_angle)

            if self.opt.dataset == 'hrsc':
                a = bbox[-1]
                if flipped:
                    a = np.pi - a
            elif self.opt.dataset == 'dota':
                a = bbox[-1]
                # ####### dota的json文件角度是0到2pi ##########
                if flipped:
                    a = 2 * np.pi - a
            elif self.opt.dataset == 'rosku':
                # ####### rosku的json文件角度是-0.5pi到0.5pi ##########
                a = bbox[-1] / math.pi
                if flipped:
                    a = -1 * a
                a = np.clip(a, -0.5, 0.5)
                a = a + 0.5
            else:
                raise Exception('Wrong dataset.')

            if DEBUG:
                color = [255, 0, 0]
                line_width = 2
                # ####### rosku的json文件角度是-0.5pi到0.5pi ##########
                # temp_a = (a - 0.5) * math.pi
                temp_a = a
                npt1, npt2, npt3, npt4 = self._get_four_points((ct[0], ct[1]), temp_a, w, h)
                npt1 = self._float_to_int(npt1)
                npt2 = self._float_to_int(npt2)
                npt3 = self._float_to_int(npt3)
                npt4 = self._float_to_int(npt4)
                cv2.line(raw_img, npt1, npt2, color, line_width)
                cv2.line(raw_img, npt2, npt3, color, line_width)
                cv2.line(raw_img, npt3, npt4, color, line_width)
                cv2.line(raw_img, npt4, npt1, color, line_width)


            if 0 <= ct[0] <= output_w - 1 and 0 <= ct[1] <= output_h - 1:
                # 热力图,GT进行一定扩散
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct_int = ct.astype(np.int32)
                # 中心点绘制GT
                draw_gaussian(hm[cls_id], ct_int, radius)

                wh[k] = 1. * w, 1. * h
                angle[k] = 1. * a
                ind[k] = ct_int[1] * output_w + ct_int[0]  # 物体在特征图上索引值
                reg[k] = ct - ct_int  # ct的实际值和整数化后的偏移
                reg_mask[k] = 1
                # wh设置
                cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)

                # angle设置
                cat_spec_angle[k, cls_id] = angle[k]
                cat_spec_angle_mask[k, cls_id] = 1
                if self.opt.dense_angle or self.opt.fsm:
                    draw_dense_reg(dense_angle, hm.max(axis=0), ct_int, angle[k], radius)
                # ang_radius = max(int(1.0), int(radius/2.))
                # draw_dense_reg_uni(dense_angle[0, :], ct_int, angle[k], ang_radius)
                gt_det.append([ct[0], ct[1], w, h, angle[k], 1, cls_id])

        ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'angle': angle}

        # wh
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
            del ret['wh']

        # angle
        if self.opt.dense_angle or self.opt.fsm:
            dense_angle_mask = hm.max(axis=0, keepdims=True)
            ret.update({'dense_angle': dense_angle, 'dense_angle_mask': dense_angle_mask})
            if self.opt.dense_angle:
                del ret['angle']
        elif self.opt.cat_spec_angle:
            ret.update({'cat_spec_angle': cat_spec_angle, 'cat_spec_angle_mask': cat_spec_angle_mask})
            del ret['angle']

        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 7), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id, 'img_name':file_name}
            ret['meta'] = meta

        if DEBUG:
            ret['raw_img'] = raw_img
            ret['gt_det'] = gt_det
            ret['img_id'] = img_id
            cv2.imwrite(os.path.join('./cache', '%s.jpg' % img_id), raw_img)
        return ret
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = self.data_dir + f"/JPEGImages/{img_id}.jpg"

        if self.has_landmark == 1:
            anns = self._get_annotation_lm(img_id)
        else:
            anns = self._get_annotation(img_id)

        num_objs = min(len(anns), self.max_objs)
        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) * self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        kpts_reg = np.zeros((self.max_objs, 10), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)

        #kpts_mask 关键点回归非heatmap 预测
        kpts_mask = np.zeros((self.max_objs, 10), dtype=np.uint8)

        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            box, landmarks, label = anns[k]
            bbox = np.array(box, dtype=np.float32)
            lm = np.array(landmarks, dtype=np.float32)
            cls_id = int(self.cat_ids[label])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1

                flag_lm = 0
                for idx in range(10):
                    flag_lm += lm[idx]
                if flag_lm > 1:
                    for idx in range(0, 10, 2):
                        lm[idx:idx + 2] = affine_transform(
                            lm[idx:idx + 2], trans_output)
                        if lm[idx] >= 0 and lm[idx] < output_w and \
                                lm[idx + 1] >= 0 and lm[idx + 1]<output_h:
                            kpts_mask[k, idx:idx + 2] = 1
                            kpts_reg[k][idx] = (lm[idx] - ct_int[0])
                            kpts_reg[k][idx + 1] = (lm[idx + 1] - ct_int[1])

                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.kpts_reg:  #关键点回归的添加
            ret.update({'kpts_reg': kpts_reg})
            ret.update({'kpts_mask': kpts_mask})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Exemple #18
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)

        anns = list(
            filter(
                lambda x: x['category_id'] in self._valid_ids and x['iscrowd']
                != 1, anns))
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0
        rot = 0

        flipped = False
        if self.split == 'train':
            if self.cfg.DATASET.RANDOM_CROP:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.cfg.DATASET.SCALE
                cf = self.cfg.DATASET.SHIFT
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            if np.random.random() < self.cfg.DATASET.AUG_ROT:
                rf = self.cfg.DATASET.ROTATE
                rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)

            if np.random.random() < self.cfg.DATASET.FLIP:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(
            c, s, rot, [self.cfg.MODEL.INPUT_RES, self.cfg.MODEL.INPUT_RES])
        inp = cv2.warpAffine(
            img,
            trans_input, (self.cfg.MODEL.INPUT_RES, self.cfg.MODEL.INPUT_RES),
            flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.cfg.DATASET.NO_COLOR_AUG:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - np.array(self.cfg.DATASET.MEAN).astype(
            np.float32)) / np.array(self.cfg.DATASET.STD).astype(np.float32)
        inp = inp.transpose(2, 0, 1)

        output_res = self.cfg.MODEL.OUTPUT_RES
        num_joints = self.num_joints
        trans_output_rot = get_affine_transform(c, s, rot,
                                                [output_res, output_res])
        trans_output = get_affine_transform(c, s, 0, [output_res, output_res])
        trans_seg_output = get_affine_transform(c, s, 0,
                                                [output_res, output_res])
        hm = np.zeros((self.num_classes, output_res, output_res),
                      dtype=np.float32)
        hm_hp = np.zeros((num_joints, output_res, output_res),
                         dtype=np.float32)
        dense_kps = np.zeros((num_joints, 2, output_res, output_res),
                             dtype=np.float32)
        dense_kps_mask = np.zeros((num_joints, output_res, output_res),
                                  dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        seg = np.zeros((self.max_objs, output_res, output_res),
                       dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        kps_mask = np.zeros((self.max_objs, self.num_joints * 2),
                            dtype=np.uint8)
        hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
        hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
        hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)

        draw_gaussian = draw_msra_gaussian if self.cfg.LOSS.MSE_LOSS else \
                        draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(ann['category_id']) - 1
            pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3)
            segment = self.coco.annToMask(ann)
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                pts[:, 0] = width - pts[:, 0] - 1
                for e in self.flip_idx:
                    pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()
                segment = segment[:, ::-1]

            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox = np.clip(bbox, 0, output_res - 1)
            segment = cv2.warpAffine(segment,
                                     trans_seg_output,
                                     (output_res, output_res),
                                     flags=cv2.INTER_LINEAR)
            segment = segment.astype(np.float32)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if (h > 0 and w > 0) or (rot != 0):
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = self.cfg.hm_gauss if self.cfg.LOSS.MSE_LOSS else max(
                    0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_res + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1

                #mask
                pad_rate = 0.3
                segment_mask = np.ones_like(segment)
                x,y = (np.clip([ct[0] - (1 + pad_rate)*w/2 ,ct[0] + (1 + pad_rate)*w/2 ],0,output_res - 1)*2).astype(np.int), \
                      (np.clip([ct[1] - (1 + pad_rate)*h/2 , ct[1] + (1 + pad_rate)*h/2],0,output_res - 1)*2).astype(np.int)
                segment_mask[y[0]:y[1], x[0]:x[1]] = 0
                segment[segment_mask == 1] = 255
                seg[k] = segment

                #keypoint
                num_kpts = pts[:, 2].sum()
                if num_kpts == 0:
                    hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
                    reg_mask[k] = 0

                hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                hp_radius = self.cfg.hm_gauss \
                            if self.cfg.LOSS.MSE_LOSS else max(0, int(hp_radius))
                for j in range(num_joints):
                    if pts[j, 2] > 0:
                        pts[j, :2] = affine_transform(pts[j, :2],
                                                      trans_output_rot)
                        if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
                           pts[j, 1] >= 0 and pts[j, 1] < output_res:
                            kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int
                            kps_mask[k, j * 2:j * 2 + 2] = 1
                            pt_int = pts[j, :2].astype(np.int32)
                            hp_offset[k * num_joints + j] = pts[j, :2] - pt_int
                            hp_ind[k * num_joints +
                                   j] = pt_int[1] * output_res + pt_int[0]
                            hp_mask[k * num_joints + j] = 1
                            if self.cfg.LOSS.DENSE_HP:
                                # must be before draw center hm gaussian
                                draw_dense_reg(dense_kps[j],
                                               hm[cls_id],
                                               ct_int,
                                               pts[j, :2] - ct_int,
                                               radius,
                                               is_offset=True)
                                draw_gaussian(dense_kps_mask[j], ct_int,
                                              radius)
                            draw_gaussian(hm_hp[j], pt_int, hp_radius)
                draw_gaussian(hm[cls_id], ct_int, radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] +
                    h / 2, 1
                ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])
        if rot != 0:
            hm = hm * 0 + 0.9999
            reg_mask *= 0
            kps_mask *= 0
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'hps': kps,
            'hps_mask': kps_mask,
            'seg': seg
        }
        if self.cfg.LOSS.DENSE_HP:
            dense_kps = dense_kps.reshape(num_joints * 2, output_res,
                                          output_res)
            dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res,
                                                    output_res)
            dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask],
                                            axis=1)
            dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res,
                                                    output_res)
            ret.update({
                'dense_hps': dense_kps,
                'dense_hps_mask': dense_kps_mask
            })
            del ret['hps'], ret['hps_mask']
        if self.cfg.LOSS.REG_OFFSET:
            ret.update({'reg': reg})
        if self.cfg.LOSS.HM_HP:
            ret.update({'hm_hp': hm_hp})
        if self.cfg.LOSS.REG_HP_OFFSET:
            ret.update({
                'hp_offset': hp_offset,
                'hp_ind': hp_ind,
                'hp_mask': hp_mask
            })
        if self.cfg.DEBUG > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 40), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Exemple #19
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        # all anns of one img
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)

        # height, width
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)  # ori img center

        if self.opt.keep_res:  # False
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            # not keep_res, use opt.input_h, w
            # note: h != w, ori not keep_res, then set w=h=512
            # s = max(img.shape[0], img.shape[1]) * 1.0
            s = np.array([width, height], dtype=np.float32)  # ori img size?
            input_h, input_w = self.opt.input_h, self.opt.input_w

        # flip
        flipped = False

        # get scale and center to do affine transform
        if self.split == 'train':
            # random scale
            if not self.opt.not_rand_crop:
                # train set opt.not_rand_crop=False, so will use default random scale
                # s = s * np.random.choice(np.arange(0.4, 0.6, 0.1))  # (1920,1080) -> (640)
                # note: restrict the img center translate range, lrtb 1/2
                # w_border = self._get_border(img.shape[1] // 4, img.shape[1])
                # h_border = self._get_border(img.shape[0] // 4, img.shape[0])
                # random center, this may translate img so far
                w_range, h_range = img.shape[1] // 8, img.shape[0] // 8
                c[0] = np.random.randint(low=img.shape[1] // 2 - w_range,
                                         high=img.shape[1] // 2 + w_range)
                c[1] = np.random.randint(low=img.shape[0] // 2 - h_range,
                                         high=img.shape[0] // 2 + h_range)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            # random flip
            if np.random.random() < self.opt.flip:  # 0.5
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        # trans ori img to input size
        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        # use generated trans_input matrix to trans img
        inp = cv2.warpAffine(img, trans_input,
                             (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        # note: see trans img
        # print('scale:', s, 'center:', c)
        # cv2.imwrite('{}_img_trans.png'.format(img_id), inp)
        inp = (inp.astype(np.float32) / 255.)

        # color augment
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

        # normalize
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        # down sample
        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes

        # trans ori img box to output size
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        # draw gaussian core on heatmap
        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)  # 20
        # dense or sparse wh regress
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)  # (10,2) sparse!
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)  # dense!
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)  # (10,2)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)

        # msra, umich
        # opt.mse_loss = False
        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian

        # GT
        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])  # xywh -> x1y1x2y2; shape (4,)
            segmentation = np.array(ann['segmentation'][0]).reshape((-1, 2))  # x,y
            # map ori cat_id (whatever) to [0, num_class-1]
            cls_id = int(self.cat_ids[ann['category_id']])  # self.cat_ids in cigar.py
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1  # [0,2],
                segmentation[:, 0] = width - segmentation[:, 0] - 1  # flip x

            # transform box 2 pts to output
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)

            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]  # x1y1x2y2

            # transform segmentation, just trans polygon_center is enough
            polygon_center = self._get_polygon_center(segmentation)
            polygon_center = affine_transform(polygon_center, trans_output)
            print(polygon_center)

            if h > 0 and w > 0:
                # note: radius generated with spatial extent info from h,w
                radius = gaussian_radius(det_size=(math.ceil(h), math.ceil(w)))
                radius = max(0, int(math.ceil(radius / 3)))
                # radius = max(0, int(radius))
                # opt.mse_loss = False
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                # box center
                box_center = np.array([(bbox[0] + bbox[2]) / 2,
                                       (bbox[1] + bbox[3]) / 2], dtype=np.float32)
                print(box_center)
                # note: change ct to polygon center
                ct = polygon_center
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                # label of w,h
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]  # 1D ind of ct position
                # note: update offset
                reg[k] = box_center - ct_int  # float_box_center - int_polygon_center
                print('offset:', reg[k])
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)

                # use box_center to compute box
                ct = box_center.astype(np.int32)
                gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
                               ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }

        # from utils.plt_utils import plt_heatmaps
        # note: see heatmaps
        # plt_heatmaps(hm, basename='{}_hm'.format(img_id))
        # print(wh)

        if self.opt.dense_wh:  # False
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Exemple #20
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        # Get the image
        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]

        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            # Random crop by default
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
            # Otherwise scale and shift image
            else:
                sf = self.opt.scale
                cf = self.opt.shift

                # Scale image
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)

                # Shift image
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            # Flip image
            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        if self.opt.rotate > 0:  # rotate the image
            if self.opt.rotate == 90:
                img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
            if self.opt.rotate == 180:
                img = cv2.rotate(img, cv2.ROTATE_180)
            if self.opt.rotate == 270:
                img = cv2.rotate(img, cv2.img_rotate_90_counterclockwise)

        # Perform affine transformation
        trans_input = get_affine_transform(
            c, s, 0, [input_w, input_h])

        # Warp affine
        inp = cv2.warpAffine(img, trans_input,
                             (input_w, input_h),
                             flags=cv2.INTER_LINEAR)

        # Scale RGB pixels
        inp = (inp.astype(np.float32) / 255.)

        # Add color augmentation
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)

        # Add for circle
        cl = np.zeros((self.max_objs, 1), dtype=np.float32)
        dense_cl = np.zeros((1, output_h, output_w), dtype=np.float32)
        reg_cl = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind_cl = np.zeros((self.max_objs), dtype=np.int64)
        cat_spec_cl = np.zeros((self.max_objs, num_classes * 1), dtype=np.float32)
        cat_spec_clmask = np.zeros((self.max_objs, num_classes * 1), dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        gt_det = []
        # For each object in the annotation
        for k in range(num_objs):
            # Get the annotation
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])

            # Debug print statements
            # print(self.cat_ids)
            # print(ann['category_id'])
            # print(int(self.cat_ids[int(ann['category_id'])]))

            cls_id = int(self.cat_ids[int(ann['category_id'])])

            center_point = ann['circle_center']
            center_radius = ann['circle_radius']

            # If the image was flipped, then flip the annotation
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                center_point[0] = width - center_point[0]

            # If the image was affine transformed, then transform the annotation
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            center_point_aff = affine_transform(center_point, trans_output)
            center_radius_aff = center_radius * trans_output[0][0]
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0 and center_point_aff[0]>0 \
                    and center_point_aff[1]>0 and center_point_aff[0]<output_w\
                    and center_point_aff[1]<output_h:

                ct = np.array(
                    [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
                #
                ct_int = ct.astype(np.int32)
                # # draw_gaussian(hm[cls_id], ct_int, radius)
                # wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                # reg[k] = ct - ct_int

                # cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
                # cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
                # if self.opt.dense_wh:
                #     draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
                # gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
                #                ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
                if self.opt.ez_guassian_radius:
                    radius = center_radius_aff
                else:
                    radius = gaussian_radius((math.ceil(center_radius_aff*2), math.ceil(center_radius_aff*2)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                cp = center_point_aff
                cp_int = cp.astype(np.int32)
                draw_gaussian(hm[cls_id], cp_int, radius)
                ind_cl[k] = cp_int[1] * output_w + cp_int[0]
                reg_cl[k] = cp - cp_int
                reg_mask[k] = 1
                cr = center_radius_aff
                cl[k] = 1. * cr
                cat_spec_cl[k, cls_id * 1: cls_id * 1 + 1] = cl[k]
                cat_spec_clmask[k, cls_id * 1: cls_id * 1 + 1] = 1
                if self.opt.filter_boarder:
                    if cp[0] - cr < 0 or cp[0] + cr > output_w:
                        continue
                    if cp[1] - cr < 0 or cp[1] + cr > output_h:
                        continue
                gt_det.append([cp[0], cp[1], cr, 1, cls_id])

                # if ind_cl[0]<0:
                #     aaa = 1
                #
                # print('ind')
                # print(ind[0:10])
                # print('ind_cl')
                # print(ind_cl[0:10])

        ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind_cl, 'cl': cl}
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg_cl})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 5), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Exemple #21
0
    def __getitem__(self, index):
        img_id = self.images[index]
        img_info = self.coco.loadImgs(ids=[img_id])[0]
        img_path = os.path.join(self.img_dir, img_info['file_name'])
        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.])
        s = max(img.shape[0], img.shape[1]) * 1.0

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
                c[0] += img.shape[1] * np.clip(np.random.randn() * cf, -2 * cf,
                                               2 * cf)
                c[1] += img.shape[0] * np.clip(np.random.randn() * cf, -2 * cf,
                                               2 * cf)
            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]

        trans_input = get_affine_transform(
            c, s, 0, [self.opt.input_res, self.opt.input_res])
        inp = cv2.warpAffine(img,
                             trans_input,
                             (self.opt.input_res, self.opt.input_res),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_res = self.opt.output_res
        num_classes = self.opt.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_res, output_res])
        num_hm = 1 if self.opt.agnostic_ex else num_classes

        hm_t = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
        hm_l = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
        hm_b = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
        hm_r = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
        hm_c = np.zeros((num_classes, output_res, output_res),
                        dtype=np.float32)
        reg_t = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg_l = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg_b = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg_r = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind_t = np.zeros((self.max_objs), dtype=np.int64)
        ind_l = np.zeros((self.max_objs), dtype=np.int64)
        ind_b = np.zeros((self.max_objs), dtype=np.int64)
        ind_r = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)

        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)
        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        for k in range(num_objs):
            ann = anns[k]
            # bbox = self._coco_box_to_bbox(ann['bbox'])
            # tlbr
            pts = np.array(ann['extreme_points'],
                           dtype=np.float32).reshape(4, 2)
            # cls_id = int(self.cat_ids[ann['category_id']] - 1) # bug
            cls_id = int(self.cat_ids[ann['category_id']])
            hm_id = 0 if self.opt.agnostic_ex else cls_id
            if flipped:
                pts[:, 0] = width - pts[:, 0] - 1
                pts[1], pts[3] = pts[3].copy(), pts[1].copy()
            for j in range(4):
                pts[j] = affine_transform(pts[j], trans_output)
            pts = np.clip(pts, 0, self.opt.output_res - 1)
            h, w = pts[2, 1] - pts[0, 1], pts[3, 0] - pts[1, 0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                pt_int = pts.astype(np.int32)
                draw_gaussian(hm_t[hm_id], pt_int[0], radius)
                draw_gaussian(hm_l[hm_id], pt_int[1], radius)
                draw_gaussian(hm_b[hm_id], pt_int[2], radius)
                draw_gaussian(hm_r[hm_id], pt_int[3], radius)
                reg_t[k] = pts[0] - pt_int[0]
                reg_l[k] = pts[1] - pt_int[1]
                reg_b[k] = pts[2] - pt_int[2]
                reg_r[k] = pts[3] - pt_int[3]
                ind_t[k] = pt_int[0, 1] * output_res + pt_int[0, 0]
                ind_l[k] = pt_int[1, 1] * output_res + pt_int[1, 0]
                ind_b[k] = pt_int[2, 1] * output_res + pt_int[2, 0]
                ind_r[k] = pt_int[3, 1] * output_res + pt_int[3, 0]

                ct = [
                    int((pts[3, 0] + pts[1, 0]) / 2),
                    int((pts[0, 1] + pts[2, 1]) / 2)
                ]
                draw_gaussian(hm_c[cls_id], ct, radius)
                reg_mask[k] = 1
        ret = {
            'input': inp,
            'hm_t': hm_t,
            'hm_l': hm_l,
            'hm_b': hm_b,
            'hm_r': hm_r,
            'hm_c': hm_c
        }
        if self.opt.reg_offset:
            ret.update({
                'reg_mask': reg_mask,
                'reg_t': reg_t,
                'reg_l': reg_l,
                'reg_b': reg_b,
                'reg_r': reg_r,
                'ind_t': ind_t,
                'ind_l': ind_l,
                'ind_b': ind_b,
                'ind_r': ind_r
            })

        return ret
Exemple #22
0
    def __getitem__(self, index):
        #函数为入口。这里我们可以得到我们输出参数,分别是\color{red}{inp, hm, reg\_mask, ind, wh}。
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)  # 目标个数,这里为100

        img = cv2.imread(img_path)
        #接着我们获取图片的最长边以及输入尺寸(512,512)
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.],
                     dtype=np.float32)  # 获取中心点
        if self.opt.keep_res:  # False
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:  # True
            s = max(img.shape[0], img.shape[1]) * 1.0  # s最长的边长
            input_h, input_w = self.opt.input_h, self.opt.input_w  # 512, 512

        #对数据进行一系列处理。最终输出结果即我们第一个所需要的输入图像\color{red}{inp}.
        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s  # * np.random.choice(np.arange(0.6, 1.4, 0.1))# 随机尺度
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1  # 随机裁剪

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)  # 放射变换
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        #接着我们需要完成我们的heatmap的生成。
        output_h = input_h // self.opt.down_ratio  # 输出512//4=128
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w),
                      dtype=np.float32)  # heatmap(80,128,128)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)  # 中心点宽高(100*2)
        angs = np.zeros((self.max_objs, 1), dtype=np.float32)  # 角度(100*2)
        dense_wh = np.zeros((2, output_h, output_w),
                            dtype=np.float32)  # 返回2*128*128
        reg = np.zeros((self.max_objs, 2),
                       dtype=np.float32)  # 记录下采样带来的误差,返回100*2的小数
        ind = np.zeros((self.max_objs), dtype=np.int64)  # 返回100个ind
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)  # 返回8个 回归mask
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)  # 100*80*2
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)  # 100*80*2
        #这里mse_loss为False, 所以我们只需要关注draw_umich_gaussian函数即可
        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                bbox[[4]] = 180 - bbox[[4]]
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:4] = affine_transform(bbox[2:4], trans_output)

            #这里是导致舰船检测过程中出现中心点偏移的关键

            #bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            #bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            #TODO insert
            ang = bbox[4]
            h = np.clip(h, 0, output_h - 1)
            w = np.clip(w, 0, output_w - 1)
            if h > 0 and w > 0:
                radius = gaussian_radius(
                    (math.ceil(h), math.ceil(w)))  #关键是如何确定高斯半径
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                if ct[0] < 0 or ct[0] > output_w - 1 or ct[1] < 0 or ct[
                        1] > output_h - 1:  #
                    continue
                # ct[0] = np.clip(ct[0], 0, output_w - 1)
                # ct[1] = np.clip(ct[1], 0, output_h - 1)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                #cv2.imwrite("/data/humaocheng/CenterNet-master/single_heatmap.jpg", hm[0]*255)
                wh[k] = 1. * w, 1. * h  # 目标矩形框的宽高——目标尺寸损失
                angs[k] = 1. * ang
                ind[k] = ct_int[1] * output_w + ct_int[
                    0]  # 目标中心点在128×128特征图中的索引
                reg[k] = ct - ct_int  # off Loss, # ct 即 center point reg是偏置回归数组,存放每个中心店的偏置值 k是当前图中第k个目标
                # 实际例子为
                # [98.97667 2.3566666] - [98  2] = [0.97667, 0.3566666]
                reg_mask[k] = 1  #是记录我们前100个点,这里相当于记载一张图片存在哪些目标,
                #有的话对应索引设置为1,其余设置为0。
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                # gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
                #                ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
                #TODO insert
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    ang, 1, cls_id
                ])
        # cv2.imwrite("/data/humaocheng/CenterNet-master/heatmap.jpg",hm[0]*255)
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'ang': angs
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Exemple #23
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)
        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0
        rot = 0

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                #TODO这里是更改多尺度训练的地方。
                s = s  #* np.random.choice(np.arange(0.8, 1.5, 0.1))#change 0.6 1.4
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            if np.random.random() < self.opt.aug_rot:  # roate aug
                rf = self.opt.rotate
                rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

                #下面这段代码求旋转的角度
        if self.opt.angle_norm and self.split == 'train':
            angle_list = np.array(angle_list) % np.pi  #首先归一化到np.pi
            angle_int = (angle_list // (np.pi / 9)).astype('int')
            angle_b = np.bincount(angle_int)
            index_rot = np.argmax(angle_b)
            ind_rot = (angle_list >
                       (index_rot) * np.pi / 9) * (angle_list <=
                                                   (index_rot + 1) * np.pi / 9)
            angle_rot = np.average(angle_list[ind_rot])
            #这段代码是旋转图像,和中间点特征图,关键点特征图
            angle_img_rot = angle_rot * (-180) / np.pi
            hm_rotate = hm.transpose(1, 2, 0)
            M = cv2.getRotationMatrix2D(
                ((output_res) / 2.0, (output_res) / 2.0), angle_img_rot, 1)
            hm_rotate = cv2.warpAffine(hm_rotate, M, (output_res, output_res))
            hm = hm_rotate.transpose(2, 0, 1)
            hp_rotate = hm_hp.transpose(1, 2, 0)
            hp_rotate = cv2.warpAffine(hp_rotate, M, (output_res, output_res))
            hm_hp = hp_rotate[np.newaxis, :]
            M = cv2.getRotationMatrix2D(
                ((self.opt.input_res) / 2.0, (self.opt.input_res) / 2.0),
                angle_img_rot, 1)
            inp = inp.transpose(1, 2, 0)
            inp = cv2.warpAffine(inp, M,
                                 (self.opt.input_res, self.opt.input_res))
            inp = inp.transpose(2, 0, 1)
            # inp1=cv2.warpAffine(inp1,M,(self.opt.input_res,self.opt.input_res))
            #结束

        trans_input = get_affine_transform(
            c, s, rot, [self.opt.input_res, self.opt.input_res])
        # inp1 = cv2.warpAffine(img, trans_input,
        #                      (self.opt.input_res, self.opt.input_res),
        #                      flags=cv2.INTER_LINEAR)
        inp = cv2.warpAffine(img,
                             trans_input,
                             (self.opt.input_res, self.opt.input_res),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_res = self.opt.output_res
        num_joints = self.num_joints
        trans_output_rot = get_affine_transform(c, s, rot,
                                                [output_res, output_res])
        trans_output = get_affine_transform(c, s, 0, [output_res, output_res])

        hm = np.zeros((self.num_classes, output_res, output_res),
                      dtype=np.float32)
        hm_hp = np.zeros((num_joints, output_res, output_res),
                         dtype=np.float32)
        dense_kps = np.zeros((num_joints, 2, output_res, output_res),
                             dtype=np.float32)
        dense_kps_mask = np.zeros((num_joints, output_res, output_res),
                                  dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        kps_mask = np.zeros((self.max_objs, self.num_joints * 2),
                            dtype=np.uint8)
        hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
        hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
        hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []
        angle_list = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            #TODO change wwlekeuihx
            cls_id = int(ann['category_id']) - 1
            pts = np.array(ann['keypoints'][0:3],
                           np.float32).reshape(num_joints, 3)  #tmjx
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                pts[:, 0] = width - pts[:, 0] - 1
                #for e in self.flip_idx:
                #pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            #bbox = np.clip(bbox, 0, output_res - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            h = np.clip(h, 0, output_res - 1)
            w = np.clip(w, 0, output_res - 1)
            if (h > 0 and w > 0) or (rot != 0):
                radius = gaussian_radius((math.ceil(h), math.ceil(w))) * 1.2
                sqrt_wh = np.sqrt(np.sqrt(h * w))
                radius_w = radius * np.sqrt(w) / sqrt_wh
                radius_h = radius * np.sqrt(h) / sqrt_wh
                radius_w = self.opt.hm_gauss if self.opt.mse_loss else max(
                    0, np.ceil(radius_w))
                radius_h = self.opt.hm_gauss if self.opt.mse_loss else max(
                    0, np.ceil(radius_h))
                # radius = self.opt.hm_gauss if self.opt.mse_loss else max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)

                ct[0] = np.clip(ct[0], 0, output_res - 1)
                ct[1] = np.clip(ct[1], 0, output_res - 1)

                ct_int = ct.astype(np.int32)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_res + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                num_kpts = pts[:, 2].sum()
                if num_kpts == 0:
                    hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
                    reg_mask[k] = 0

                hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                hp_radius = self.opt.hm_gauss \
                            if self.opt.mse_loss else max(0, int(hp_radius))
                for j in range(num_joints):
                    if pts[j, 2] > 0:
                        pts[j, :2] = affine_transform(pts[j, :2],
                                                      trans_output_rot)
                        if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
                           pts[j, 1] >= 0 and pts[j, 1] < output_res:
                            kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int
                            kps_mask[k, j * 2:j * 2 + 2] = 1
                            pt_int = pts[j, :2].astype(np.int32)
                            hp_offset[k * num_joints + j] = pts[j, :2] - pt_int
                            hp_ind[k * num_joints +
                                   j] = pt_int[1] * output_res + pt_int[0]
                            hp_mask[k * num_joints + j] = 1
                            if self.opt.dense_hp:
                                # must be before draw center hm gaussian
                                draw_dense_reg(dense_kps[j],
                                               hm[cls_id],
                                               ct_int,
                                               pts[j, :2] - ct_int,
                                               radius,
                                               is_offset=True)
                                draw_gaussian(dense_kps_mask[j], ct_int,
                                              radius)
                            draw_gaussian(hm_hp[j], pt_int, hp_radius)
                #TODO change
                angle = math.atan2((pts[0, 0] - ct[0]), (pts[0, 1] - ct[1]))
                angle_list.append(angle)
                draw_gaussian(hm[cls_id], ct_int, [radius_w, radius_h, angle])
                # draw_gaussian(hm[cls_id], ct_int, radiusw,radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] +
                    h / 2, 1
                ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])
        if rot != 0:
            hm = hm * 0 + 0.9999
            reg_mask *= 0
            kps_mask *= 0

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'hps': kps,
            'hps_mask': kps_mask
        }

        if self.opt.dense_hp:
            dense_kps = dense_kps.reshape(num_joints * 2, output_res,
                                          output_res)
            dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res,
                                                    output_res)
            dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask],
                                            axis=1)
            dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res,
                                                    output_res)
            ret.update({
                'dense_hps': dense_kps,
                'dense_hps_mask': dense_kps_mask
            })
            del ret['hps'], ret['hps_mask']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.hm_hp:
            ret.update({'hm_hp': hm_hp})
        if self.opt.reg_hp_offset:
            ret.update({
                'hp_offset': hp_offset,
                'hp_ind': hp_ind,
                'hp_mask': hp_mask
            })
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 40), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta


#这里是调试可视化生成的特征图的程序
# debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug==3),
#                     theme=self.opt.debugger_theme)
# self.debug(debugger, inp1,  ret)
        return ret
Exemple #24
0
  def __getitem__(self, index):
    img_id = self.images[index]
    file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
    img_path = os.path.join(self.img_dir, file_name)
    ann_ids = self.coco.getAnnIds(imgIds=[img_id])
    anns = self.coco.loadAnns(ids=ann_ids)
    num_objs = min(len(anns), self.max_objs)

    img = cv2.imread(img_path)

    height, width = img.shape[0], img.shape[1]
    c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)

    if self.opt.keep_res:
      input_h = (height | self.opt.pad) + 1
      input_w = (width | self.opt.pad) + 1
      s = np.array([input_w, input_h], dtype=np.float32)
    else:
      s = max(img.shape[0], img.shape[1]) * 1.0
      input_h, input_w = self.opt.input_h, self.opt.input_w

      if self.split == 'train':
          input_w = self.patch_sizes[(self.getcount//self.opt.batch_size) % len(self.patch_sizes)]
          input_h = input_w

          self.getcount = 0 if self.getcount == self.num_samples else self.getcount + 1

    flipped = False

    if self.split == 'train':
      if not self.opt.not_rand_crop:
        s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))

        w_border = self._get_border(128, img.shape[1])
        h_border = self._get_border(128, img.shape[0])

        c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
        c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
      else:
        sf = self.opt.scale
        cf = self.opt.shift

        c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
        c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)

        s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)

      if np.random.random() < self.opt.flip:
        flipped = True
        img = img[:, ::-1, :]
        c[0] =  width - c[0] - 1


    trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
    inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
    inp = (inp.astype(np.float32) / 255.)

    if self.split == 'train' and not self.opt.no_color_aug:
      color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

    inp = (inp - self.mean) / self.std
    inp = inp.transpose(2, 0, 1)

    output_h = input_h // self.opt.down_ratio
    output_w = input_w // self.opt.down_ratio
    num_classes = self.num_classes

    trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

    hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
    wh = np.zeros((self.max_objs, 2), dtype=np.float32)
    dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
    reg = np.zeros((self.max_objs, 2), dtype=np.float32)
    ind = np.zeros((self.max_objs), dtype=np.int64)
    reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
    cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
    cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)

    draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                    draw_umich_gaussian

    gt_det = []

    allmask = np.zeros((output_h, output_w, self.opt.num_maskclasses+levelnum), dtype=np.uint8)

    for k in range(num_objs):
      ann = anns[k]
      bbox = self._coco_box_to_bbox(ann['bbox'])

      if ann['category_id'] not in self._valid_ids:
        continue

      cls_id = int(self.cat_ids[ann['category_id']])

      if flipped:
        bbox[[0, 2]] = width - bbox[[2, 0]] - 1

      bbox[:2] = affine_transform(bbox[:2], trans_output)
      bbox[2:] = affine_transform(bbox[2:], trans_output)
      bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
      bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)

      x1 = int(bbox[0])
      y1 = int(bbox[1])
      x2 = int(bbox[2])
      y2 = int(bbox[3])

      h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

      if h > 0 and w > 0:
        ### gen mask begin ###
        # clsbase = cls_id*9
        clsbase = 0*9
        mask = self.coco.annToMask(ann)

        if flipped:
          mask = mask[:, ::-1]

        mask = cv2.warpAffine(mask, trans_output, (output_w, output_h), flags=cv2.INTER_LINEAR)

        roi = mask[y1:y2, x1:x2]
        roi_h, roi_w = roi.shape

        if roi_h < 6 or roi_w < 6:
          continue

        l = size2level(output_w*output_h, roi_w*roi_h)
        allmask[:,:,self.opt.num_maskclasses+l] = np.bitwise_or(allmask[:,:,self.opt.num_maskclasses+l], mask)
        allmask[:,:,self.opt.num_maskclasses+l+1] = np.bitwise_or(allmask[:,:,self.opt.num_maskclasses+l+1], mask)

        roi_cx = roi_w//2
        roi_cy = roi_h//2
        cell_w = (roi_w+5)//6
        cell_h = (roi_h+5)//6

        allmaskroi = allmask[y1:y2, x1:x2, :]

        ww = max(6,cell_w//4)
        hh = max(6,cell_h//4)

        # TOP
        self.assignroi(0, allmaskroi, roi, 0,                0,                roi_cx-cell_w+ww, roi_cy-cell_h+hh)
        self.assignroi(1, allmaskroi, roi, roi_cx-cell_w-ww, 0,                roi_cx+cell_w+ww, roi_cy-cell_h+hh)
        self.assignroi(2, allmaskroi, roi, roi_cx+cell_w-ww, 0,                roi_w,            roi_cy-cell_h+hh)

        # MIDDLE
        self.assignroi(3, allmaskroi, roi, 0,                roi_cy-cell_h-hh, roi_cx-cell_w+ww, roi_cy+cell_h+hh)
        self.assignroi(4, allmaskroi, roi, roi_cx-cell_w-ww, roi_cy-cell_h-hh, roi_cx+cell_w+ww, roi_cy+cell_h+hh)
        self.assignroi(5, allmaskroi, roi, roi_cx+cell_w-ww, roi_cy-cell_h-hh, roi_w,            roi_cy+cell_h+hh)

        # BOTTOM
        self.assignroi(6, allmaskroi, roi, 0,                roi_cy+cell_h-hh, roi_cx-cell_w+ww, roi_h           )
        self.assignroi(7, allmaskroi, roi, roi_cx-cell_w-ww, roi_cy+cell_h-hh, roi_cx+cell_w+ww, roi_h           )
        self.assignroi(8, allmaskroi, roi, roi_cx+cell_w-ww, roi_cy+cell_h-hh, roi_w,            roi_h           )
        ### gen mask end ###

        radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        radius = max(0, int(radius))

        ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
        ct_int = ct.astype(np.int32)

        if self.opt.mse_loss:
          radius = self.opt.hm_gauss
          draw_gaussian(hm[cls_id], ct_int, radius)
        else:
          #draw_gaussian(hm[cls_id], ct_int, radius)
          xradius = int(gaussian_radius((math.ceil(w),math.ceil(w))))
          yradius = int(gaussian_radius((math.ceil(h),math.ceil(h))))
          draw_elipse_gaussian(hm[cls_id], ct_int, (xradius,yradius))

        wh[k] = 1. * w, 1. * h
        ind[k] = ct_int[1] * output_w + ct_int[0]
        reg[k] = ct - ct_int
        reg_mask[k] = 1

        cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
        cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1

        if self.opt.dense_wh:
          draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)

        gt_det.append([ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])

    #cv2.imwrite("./results/hehe.jpg", (hm.max(axis=0).squeeze()*255).astype(np.uint8))

    if index % 30 == 0:
      cv2.imwrite("./results/top.jpg", (allmask[:,:,0:3]*255).astype(np.uint8))
      cv2.imwrite("./results/middle.jpg", (allmask[:,:,3:6]*255).astype(np.uint8))
      cv2.imwrite("./results/bottom.jpg", (allmask[:,:,6:9]*255).astype(np.uint8))
      cv2.imwrite("./results/full.jpg", (((allmask[:,:,0:3]+allmask[:,:,3:6]+allmask[:,:,6:9]) > 0)*255).astype(np.uint8))
      cv2.imwrite("./results/large.jpg", (((allmask[:,:,9:12]) > 0)*255).astype(np.uint8))
      cv2.imwrite("./results/small.jpg", (((allmask[:,:,12:15]) > 0)*255).astype(np.uint8))

    ret = {
      'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh,
      'allmask': allmask.astype(np.float32).transpose(2, 0, 1)
    }

    if self.opt.dense_wh:
      hm_a = hm.max(axis=0, keepdims=True)
      dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
      ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
      del ret['wh']
    elif self.opt.cat_spec_wh:
      ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
      del ret['wh']

    if self.opt.reg_offset:
      ret.update({'reg': reg})

    #if self.opt.debug > 0 or not self.split == 'train':
    if not self.split == 'train':
      if len(gt_det) > 0:
        gt_det = np.array(gt_det, dtype=np.float32)
      else:
        gt_det = np.zeros((1, 6), dtype=np.float32)

      meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
      ret['meta'] = meta

    # img = cv2.warpAffine(img, trans_output, (output_w, output_h), flags=cv2.INTER_LINEAR)
    # img = img*allmask[:,:,:3]
    # cv2.imwrite("./results/maskit.jpg", img)

    return ret
Exemple #25
0
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(
            self.img_dir,
            self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)
        img = self.coco.loadImgs(ids=[img_id])[0]
        w_img = int(img['width'])
        h_img = int(img['height'])

        labels = []
        bboxes = []
        shapes = []

        for anno in annotations:
            if anno['iscrowd'] == 1:  # Excludes crowd objects
                continue

            polygons = get_connected_polygon_using_mask(
                anno['segmentation'], (h_img, w_img),
                n_vertices=self.n_vertices,
                closing_max_kernel=50)

            gt_x1, gt_y1, gt_w, gt_h = anno['bbox']
            contour = np.array(polygons).reshape((-1, 2))

            # Downsample the contour to fix number of vertices
            if len(contour) > self.n_vertices:
                fixed_contour = resample(contour, num=self.n_vertices)
            else:
                fixed_contour = turning_angle_resample(contour,
                                                       self.n_vertices)

            fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1,
                                          gt_x1 + gt_w)
            fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1,
                                          gt_y1 + gt_h)

            contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0)**2))
            if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan:  # invalid shapes
                continue

            updated_bbox = [
                np.min(fixed_contour[:, 0]),
                np.min(fixed_contour[:, 1]),
                np.max(fixed_contour[:, 0]),
                np.max(fixed_contour[:, 1])
            ]

            shapes.append(np.ndarray.flatten(fixed_contour).tolist())
            labels.append(self.cat_ids[anno['category_id']])
            bboxes.append(updated_bbox)

        labels = np.array(labels)
        bboxes = np.array(bboxes, dtype=np.float32)
        shapes = np.array(shapes, dtype=np.float32)

        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])
            shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32)
        # bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.],
                          dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(160, width)
            h_border = get_border(160, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        trans_img = get_affine_transform(
            center, scale, 0, [self.img_size['w'], self.img_size['h']])
        img = cv2.warpAffine(img, trans_img,
                             (self.img_size['w'], self.img_size['h']))

        # -----------------------------------debug---------------------------------
        # image_show = img.copy()
        # for bbox, label in zip(bboxes, labels):
        #   if flipped:
        #     bbox[[0, 2]] = width - bbox[[2, 0]] - 1
        #   bbox[:2] = affine_transform(bbox[:2], trans_img)
        #   bbox[2:] = affine_transform(bbox[2:], trans_img)
        #   bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1)
        #   bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1)
        #   cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
        #   cv2.putText(image_show, self.class_name[label + 1], (int(bbox[0]), int(bbox[1])),
        #               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        # cv2.imshow('img', image_show)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------

        img = img.astype(np.float32) / 255.

        if self.split == 'train':
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        trans_fmap = get_affine_transform(
            center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])

        hmap = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # heatmap
        w_h_ = np.zeros((self.max_objs, 2),
                        dtype=np.float32)  # width and height of bboxes
        shapes_ = np.zeros((self.max_objs, self.n_vertices * 2),
                           dtype=np.float32)  # gt amodal segmentation polygons
        center_offsets = np.zeros(
            (self.max_objs, 2),
            dtype=np.float32)  # gt mass centers to bbox center
        codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32)
        contour_std_ = np.zeros(
            (self.max_objs, 1),
            dtype=np.float32)  # keep track of codes that is activated
        regs = np.zeros(
            (self.max_objs, 2),
            dtype=np.float32)  # regression for offsets of shape center
        inds = np.zeros((self.max_objs, ), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)

        for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                # Flip the contour x-axis
                for m in range(self.n_vertices):
                    shape[2 * m] = width - shape[2 * m] - 1

            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            # generate gt shape mean and std from contours
            for m in range(self.n_vertices
                           ):  # apply scale and crop transform to shapes
                shape[2 * m:2 * m + 2] = affine_transform(
                    shape[2 * m:2 * m + 2], trans_fmap)

            shape_clipped = np.reshape(shape, (self.n_vertices, 2))

            shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0,
                                          self.fmap_size['w'] - 1)
            shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0,
                                          self.fmap_size['h'] - 1)

            clockwise_flag = check_clockwise_polygon(shape_clipped)
            if not clockwise_flag:
                fixed_contour = np.flip(shape_clipped, axis=0)
            else:
                fixed_contour = shape_clipped.copy()
            # Indexing from the left-most vertex, argmin x-axis
            idx = np.argmin(fixed_contour[:, 0])
            indexed_shape = np.concatenate(
                (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0)

            mass_center = np.mean(indexed_shape, axis=0)
            contour_std = np.std(indexed_shape, axis=0) + 1e-4
            if h < 1e-6 or w < 1e-6:  # remove small bboxes
                continue

            # centered_shape = indexed_shape - mass_center
            norm_shape = (indexed_shape - mass_center) / np.sqrt(
                np.sum(contour_std**2))

            if h > 0 and w > 0:
                obj_c = np.array([(bbox[0] + bbox[2]) / 2,
                                  (bbox[1] + bbox[3]) / 2],
                                 dtype=np.float32)
                # obj_c = mass_center
                obj_c_int = obj_c.astype(np.int32)

                radius = max(
                    0,
                    int(
                        gaussian_radius((math.ceil(h), math.ceil(w)),
                                        self.gaussian_iou)))
                draw_umich_gaussian(hmap[label], obj_c_int, radius)
                shapes_[k] = norm_shape.reshape((1, -1))
                center_offsets[k] = mass_center - obj_c
                codes_[k], _ = fast_ista(norm_shape.reshape((1, -1)),
                                         self.dictionary,
                                         lmbda=self.sparse_alpha,
                                         max_iter=60)
                contour_std_[k] = np.sqrt(np.sum(contour_std**2))

                w_h_[k] = 1. * w, 1. * h
                # w_h_[k] = mass_center[1] - bbox[1], bbox[3] - mass_center[1], \
                #           mass_center[0] - bbox[0], bbox[2] - mass_center[0]  # [top, bottom, left, right] distance
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1

        return {
            'image': img,
            'shapes': shapes_,
            'codes': codes_,
            'offsets': center_offsets,
            'std': contour_std_,
            'hmap': hmap,
            'w_h_': w_h_,
            'regs': regs,
            'inds': inds,
            'ind_masks': ind_masks,
            'c': center,
            's': scale,
            'img_id': img_id
        }
Exemple #26
0
    def __getitem__(self, index):
        #img_id = self.images[index]
        #file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        #img_path = os.path.join(self.img_dir, file_name)
        #ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        #anns = self.coco.loadAnns(ids=ann_ids)
        #num_objs = min(len(anns), self.max_objs)
        img_id = index
        img_path = self.images[index]
        label_path = self.label_files[index]
        #print(self.img_dir)
        #print(file_name)
        img = cv2.imread(img_path)
        h, w, _ = img.shape
        labels = []
        #print(img_path)
        #print(label_path)32
        #print(os.path.isfile(label_path))

        if os.path.isfile(label_path):
            # with open(label_path, 'r') as f:
            #     x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
            x = self.labels[index]
            #print(x)
            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2)
                labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2)
                labels[:, 3] = w * (x[:, 3])
                labels[:, 4] = h * (x[:, 4])
                #labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2)
                #labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2)
        #print('labels:{}'.format(len(labels)))

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp_ori = cv2.warpAffine(img,
                                 trans_input, (input_w, input_h),
                                 flags=cv2.INTER_LINEAR)

        inp = (inp_ori.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []
        #for k in range(num_objs):
        for k in range(len(labels)):
            ann = labels[k]
            #print('ann:{}'.format(ann))
            bbox = self._coco_box_to_bbox(ann[1:5])
            #print(index,bbox)
            #cv2.rectangle(img,(int(bbox[0]), int(bbox[1])),(int(bbox[2]), int(bbox[3])),(0,0,255),3)
            #print('bbox: ',bbox)
            cls_id = int(ann[0])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            #print('refined_bbox: ',bbox)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])
                #print('refined_bbox: ',[ct[0] - w / 2, ct[1] - h / 2,
                #               ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
                #cv2.rectangle(inp_ori,(int(ct[0] - w / 2)*self.opt.down_ratio, int(ct[1] - h / 2)*self.opt.down_ratio),(int(ct[0] + w / 2)*self.opt.down_ratio, int(ct[1] + h / 2)*self.opt.down_ratio),(0,0,255),3)
        #cv2.imshow('img',img)
        #cv2.imshow('img_ori',inp_ori)
        #cv2.waitKey(0)
        #cv2.destroyAllWindows()

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
  def __getitem__(self, index):
    img_id = self.images[index]
    img_path = os.path.join(self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name'])
    ann_ids = self.coco.getAnnIds(imgIds=[img_id])
    annotations = self.coco.loadAnns(ids=ann_ids)

    labels = np.array([self.cat_ids[anno['category_id']] for anno in annotations])
    bboxes = np.array([anno['bbox'] for anno in annotations], dtype=np.float32)

    if len(bboxes) == 0:
      bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
      labels = np.array([[0]])

    bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy

    # print("===============", img_path)
    img = cv2.imread(img_path)
    height, width = img.shape[0], img.shape[1]
    # 获取中心坐标p
    center = np.array([width / 2., height / 2.], dtype=np.float32)  # center of image
    scale = max(height, width) * 1.0 # 仿射变换

    flipped = False
    if self.split == 'train':
      scale = scale * np.random.choice(self.rand_scales)
      w_border = get_border(128, width)
      h_border = get_border(128, height)
      center[0] = np.random.randint(low=w_border, high=width - w_border)
      center[1] = np.random.randint(low=h_border, high=height - h_border)

      if np.random.random() < 0.5:
        flipped = True
        img = img[:, ::-1, :]
        center[0] = width - center[0] - 1

    # 实行仿射变换
    trans_img = get_affine_transform(center, scale, 0, [self.img_size['w'], self.img_size['h']])
    img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h']))

    img = (img.astype(np.float32) / 255.)
    if self.split == 'train':
      color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

    img -= self.mean
    img /= self.std
    img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

    trans_fmap = get_affine_transform(center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])

    # 3个最重要的变量
    hmap = np.zeros((self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32)  # heatmap
    w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32)  # width and height
    regs = np.zeros((self.max_objs, 2), dtype=np.float32)  # regression

    inds = np.zeros((self.max_objs,), dtype=np.int64)
    ind_masks = np.zeros((self.max_objs,), dtype=np.uint8)

    for k, (bbox, label) in enumerate(zip(bboxes, labels)):
      if flipped:
        bbox[[0, 2]] = width - bbox[[2, 0]] - 1
      bbox[:2] = affine_transform(bbox[:2], trans_fmap)
      bbox[2:] = affine_transform(bbox[2:], trans_fmap)
      bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
      bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
      h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

      if h > 0 and w > 0:
        obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
        obj_c_int = obj_c.astype(np.int32)
        # 椭圆形状
        radius = max(0, int(gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou)))
        # 得到高斯分布
        draw_umich_gaussian(hmap[label], obj_c_int, radius)

        w_h_[k] = 1. * w, 1. * h
        # 记录偏移量
        regs[k] = obj_c - obj_c_int  # discretization error
        # 当前是obj序列中的第k个 = fmap_w * cy + cx = fmap中的序列数
        inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
        # 进行mask标记?
        ind_masks[k] = 1

    return {'image': img,
            'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks,
            'c': center, 's': scale, 'img_id': img_id}
Exemple #28
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)

        # delete not person and crowd
        #        anns = list(filter(lambda x:x['category_id'] in self._valid_ids and x['iscrowd']!= 1 , anns))
        anns = list(filter(lambda x: x['category_id'] in self._valid_ids,
                           anns))

        num_objs = min(len(anns), self.max_objs)
        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        test_im = inp.copy()

        inp = (inp.astype(np.float32) / 255.)

        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])
        trans_seg_output = get_affine_transform(c, s, 0, [output_w, output_h])
        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        seg = np.ones(
            (self.max_objs, output_h, output_w), dtype=np.float32) * 255
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        gt_det = []
        if num_objs > 0:
            iii = np.random.randint(0, num_objs)

        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            #ann['segmentation']['counts'] = ann['segmentation']['counts'].encode(encoding='UTF-8')
            if ann['segmentation'] != None:
                segment = self.coco.annToMask(ann)
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                if ann['segmentation'] != None:
                    segment = segment[:, ::-1]
            '''
            if ann['segmentation']!=None and k == iii:
                seg_index = cv2.warpAffine(segment, trans_input,
                                     (input_w, input_h),
                                     flags=cv2.INTER_LINEAR)
                seg_index = seg_index > 0
                color = np.array([[255,0,0]])
                test_im[seg_index] = test_im[seg_index]*0.2 + color * 0.8
            '''

            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            if ann['segmentation'] != None:
                segment = cv2.warpAffine(segment,
                                         trans_seg_output,
                                         (output_w, output_h),
                                         flags=cv2.INTER_LINEAR)
                segment = segment.astype(np.float32)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                pad_rate = 0.1
                if ann['segmentation'] != None:
                    segment_mask = np.ones_like(segment)
                    x,y = (np.clip([ct[0] - (1 + pad_rate)*w/2 ,ct[0] + (1 + pad_rate)*w/2 ],0,output_w - 1)).astype(np.int), \
                          (np.clip([ct[1] - (1 + pad_rate)*h/2 , ct[1] + (1 + pad_rate)*h/2],0,output_h - 1)).astype(np.int)
                    segment_mask[y[0]:y[1], x[0]:x[1]] = 0
                    segment[segment > 0] = 1
                    segment[segment_mask == 1] = 255
                    seg[k] = segment
                if ann['segmentation'] != None:
                    pass
                    #cv2.rectangle(
                    #      segment, (bbox[0], bbox[1]), (bbox[0], bbox[1]), (255,0,0), 2)
                    #print(file_name.split('/')[-1])
                    #cv2.imwrite('/home/zhe.zhao/'+ file_name.split('/')[-1].split('.')[0]+str(k)+'.jpg',segment*255)
                    #cv2.imwrite('/home/zhe.zhao/0_'+ file_name.split('/')[-1].split('.')[0]+str(k)+'.jpg',test_im)
                    #cv2.waitKey(0)
                    #seg_mask[k] = segment_mask

                #print(np.sum(segment)/np.sum(segment_mask)) ## pos / neg

                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        #cv2.imwrite('/home/zhe.zhao/'+ file_name.split('/')[-1],test_im)

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'reg': reg,
            'seg': seg
        }

        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Exemple #29
0
  def img_transform(self, img, anns, flip_en=True, scale_lv=2, out_shift=None, crop=None):
    height, width = img.shape[0], img.shape[1]
    c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
    if self.opt.keep_res:
      input_h = (height | self.opt.pad) + 1
      input_w = (width | self.opt.pad) + 1
      s = np.array([input_w, input_h], dtype=np.float32)
    else:
      s = [img.shape[1], img.shape[0]]
      input_h, input_w = self.opt.input_h, self.opt.input_w
    
    crop = [0, 0, input_w, input_h] if crop is None else crop
    flipped = False
    rot_en = self.opt.rotate > 0
    rot = crpsh_x = crpsh_y =0
    img_s = [img.shape[1], img.shape[0]]
    
    if self.split == 'train':
      if scale_lv == 2:
        s = np.random.choice([ 192, 256, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896])
      elif scale_lv == 1:
        s = np.random.choice([ 512, 576, 640, 704, 768, 832])
      else:
        s = np.random.choice([ 192, 256, 320, 384, 448, 512])
      
      distortion = 0.6
      sd = np.random.random()*distortion*2 - distortion + 1
      if img.shape[0] > img.shape[1]:
        s = [s, s*(img.shape[0] / img.shape[1])*sd]
      else:
        s = [s*(img.shape[1] / img.shape[0])*sd, s]

      crpsh_x = max( (s[0] - (crop[2]-crop[0])) / 2, (crop[2]-crop[0])*0.2)
      crpsh_y = max( (s[1] - (crop[3]-crop[1])) / 2, (crop[3]-crop[1])*0.2)

      if flip_en and np.random.random() < self.opt.flip:
        flipped = True
        img = img[:, ::-1, :]
      if rot_en:
        rot = np.random.random()*self.opt.rotate*2 - self.opt.rotate

    elif not self.opt.keep_res:
      s = np.array([input_w, input_h], dtype=np.float32)

    out_center = [input_w/2, input_h/2] if out_shift is None else out_shift
    out_center[0] += (np.random.random()*2-1) * crpsh_x
    out_center[1] += (np.random.random()*2-1) * crpsh_y
    
    trans_input = get_affine_transform(
      c, img_s, rot, s, out_center)
    trans_inv = get_affine_transform(
      c, img_s, rot, s, out_center, inv=1)
      
    inp = cv2.warpAffine(img, trans_input,
                         (input_w, input_h),
                         flags=cv2.INTER_LINEAR)
    inp = (inp.astype(np.float32) / 255.)
    if self.split == 'train' and not self.opt.no_color_aug:
      color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

    output_h = input_h // self.opt.down_ratio
    output_w = input_w // self.opt.down_ratio
    
    num_objs = min(len(anns), self.max_objs)
    ann_list = []
    
    border_xy, border_idx = get_border_coord(trans_inv, width, height, crop)
    
    for k in range(num_objs):
      ann = anns[k]
      bbox = self._coco_box_to_bbox(ann['bbox'])
      cls_id = int(self.cat_ids[ann['category_id']])
      if flipped:
          bbox[[0, 2]] = width - bbox[[2, 0]]
      bbox[:2] = affine_transform(bbox[:2], trans_input)
      bbox[2:] = affine_transform(bbox[2:], trans_input)
      segm  = ann['segmentation']

      # Create bbox from the visible part of objects through segmenation mask
      m = self.coco.annToMask(ann)
      bbox2 = mask2box(m, trans_input, border_xy, border_idx, 
                       flipped, width, height, crop)

      if rot_en:
        bbox = bbox2.astype(np.float32)
      ann_list.append([bbox, cls_id, bbox2])
      
      #end of objs loop
    meta = (c, s)
    inp = (inp - self.mean) / self.std
    inp = inp.transpose(2, 0, 1)
    
    return inp, ann_list, output_w, output_h, meta
Exemple #30
0
    def __getitem__(self, index):
        index = 45236
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        assert os.path.exists(img_path), 'Image path does not exist: {}'.format(img_path)

        # Target has {'segmentation', 'area', 'iscrowd', 'image_id', 'bbox', 'category_id'}
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        target = self.coco.loadAnns(ids=ann_ids)

        # Separate out crowd annotations. These are annotations that signify a large crowd of
        # objects of said class, where there is no annotation for each individual object.
        target = [x for x in target if not ('iscrowd' in x and x['iscrowd'])]

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]

        if len(target) > 0:
            # Pool all the masks for this image into one [num_objects,height,width] matrix
            masks = [self.coco.annToMask(obj).reshape(-1) for obj in target]
            masks = np.vstack(masks)
            masks = masks.reshape(-1, height, width)
            # if doesn't transpose, error will occur in augmentation (line 100)
            masks = masks.transpose(1, 2, 0)

        # labels = [int(self.cat_ids[obj['category_id']]) for obj in target]

        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                masks = masks[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img, trans_input,
                             (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        if self.rgb:
            inp = inp[..., ::-1]
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        d1 = masks.shape[2]
        masks = cv2.warpAffine(masks, trans_input,
                               (input_w, input_h),
                               flags=cv2.INTER_LINEAR)
        masks = np.expand_dims(masks, 2) if masks.ndim != 3 else masks
        d2 = masks.shape[2]
        assert d1 == d2
        masks = masks.transpose(2, 0, 1)
        masks = (masks >= 0.5).astype(np.uint8)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        # centers = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian

        segm_masks = []
        gt_det = []
        num_objs = min(len(target), self.max_objs)
        for k in range(num_objs):
            ann = target[k]

            # convert bboxes to point_form (xmin, ymin, xmax, ymax)
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            # After augmentation some masks will be empty.
            if h > 0 and w > 0 and masks[k].sum() > 0.0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2], dtype=np.float32)
                ct_int = ct.astype(np.int32)
                # centers[k] = ct_int[0], ct_int[1]
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
                det = [ct[0] - w / 2, ct[1] - h / 2,
                               ct[0] + w / 2, ct[1] + h / 2, cls_id]
                gt_det.append(det)
                segm_masks.append(masks[k])

        if len(segm_masks) > 0:
            masks = np.stack(segm_masks)
            gt_det = np.stack(gt_det)

        ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind,
               'wh': wh, 'masks': masks, 'gt_bbox_lbl': gt_det}

        # ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh,
        #        'masks': masks, 'centers': centers, 'gt_bbox_lbl': gt_det}

        # ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh,
        #        'masks': masks, 'labels': labels, 'crowd': crowd, 'centers': centers, 'gt_bbox': gt_det}

        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret