Example #1
0
    def __getitem__(self, index):
        img_id = self.images[index]
        img_info = self.coco.loadImgs(ids=[img_id])[0]
        img_path = os.path.join(self.img_dir, img_info['file_name'])
        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.])
        s = max(img.shape[0], img.shape[1]) * 1.0

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
                c[0] += img.shape[1] * np.clip(np.random.randn() * cf, -2 * cf,
                                               2 * cf)
                c[1] += img.shape[0] * np.clip(np.random.randn() * cf, -2 * cf,
                                               2 * cf)
            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]

        trans_input = get_affine_transform(
            c, s, 0, [self.opt.input_res, self.opt.input_res])
        inp = cv2.warpAffine(img,
                             trans_input,
                             (self.opt.input_res, self.opt.input_res),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_res = self.opt.output_res
        num_classes = self.opt.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_res, output_res])
        num_hm = 1 if self.opt.agnostic_ex else num_classes

        hm_t = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
        hm_l = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
        hm_b = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
        hm_r = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
        hm_c = np.zeros((num_classes, output_res, output_res),
                        dtype=np.float32)
        reg_t = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg_l = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg_b = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg_r = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind_t = np.zeros((self.max_objs), dtype=np.int64)
        ind_l = np.zeros((self.max_objs), dtype=np.int64)
        ind_b = np.zeros((self.max_objs), dtype=np.int64)
        ind_r = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)

        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)
        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        for k in range(num_objs):
            ann = anns[k]
            # bbox = self._coco_box_to_bbox(ann['bbox'])
            # tlbr
            pts = np.array(ann['extreme_points'],
                           dtype=np.float32).reshape(4, 2)
            # cls_id = int(self.cat_ids[ann['category_id']] - 1) # bug
            cls_id = int(self.cat_ids[ann['category_id']])
            hm_id = 0 if self.opt.agnostic_ex else cls_id
            if flipped:
                pts[:, 0] = width - pts[:, 0] - 1
                pts[1], pts[3] = pts[3].copy(), pts[1].copy()
            for j in range(4):
                pts[j] = affine_transform(pts[j], trans_output)
            pts = np.clip(pts, 0, self.opt.output_res - 1)
            h, w = pts[2, 1] - pts[0, 1], pts[3, 0] - pts[1, 0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                pt_int = pts.astype(np.int32)
                draw_gaussian(hm_t[hm_id], pt_int[0], radius)
                draw_gaussian(hm_l[hm_id], pt_int[1], radius)
                draw_gaussian(hm_b[hm_id], pt_int[2], radius)
                draw_gaussian(hm_r[hm_id], pt_int[3], radius)
                reg_t[k] = pts[0] - pt_int[0]
                reg_l[k] = pts[1] - pt_int[1]
                reg_b[k] = pts[2] - pt_int[2]
                reg_r[k] = pts[3] - pt_int[3]
                ind_t[k] = pt_int[0, 1] * output_res + pt_int[0, 0]
                ind_l[k] = pt_int[1, 1] * output_res + pt_int[1, 0]
                ind_b[k] = pt_int[2, 1] * output_res + pt_int[2, 0]
                ind_r[k] = pt_int[3, 1] * output_res + pt_int[3, 0]

                ct = [
                    int((pts[3, 0] + pts[1, 0]) / 2),
                    int((pts[0, 1] + pts[2, 1]) / 2)
                ]
                draw_gaussian(hm_c[cls_id], ct, radius)
                reg_mask[k] = 1
        ret = {
            'input': inp,
            'hm_t': hm_t,
            'hm_l': hm_l,
            'hm_b': hm_b,
            'hm_r': hm_r,
            'hm_c': hm_c
        }
        if self.opt.reg_offset:
            ret.update({
                'reg_mask': reg_mask,
                'reg_t': reg_t,
                'reg_l': reg_l,
                'reg_b': reg_b,
                'reg_r': reg_r,
                'ind_t': ind_t,
                'ind_l': ind_l,
                'ind_b': ind_b,
                'ind_r': ind_r
            })

        return ret
Example #2
0
  def __getitem__(self, index):
    img_id = self.images[index]
    file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
    img_path = os.path.join(self.img_dir, file_name)
    ann_ids = self.coco.getAnnIds(imgIds=[img_id])
    anns = self.coco.loadAnns(ids=ann_ids)
    num_objs = min(len(anns), self.max_objs)

    img = cv2.imread(img_path)

    height, width = img.shape[0], img.shape[1]
    c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
    if self.opt.keep_res:
      input_h = (height | self.opt.pad) + 1
      input_w = (width | self.opt.pad) + 1
      s = np.array([input_w, input_h], dtype=np.float32)
    else:
      s = max(img.shape[0], img.shape[1]) * 1.0
      input_h, input_w = self.opt.input_h, self.opt.input_w
    
    flipped = False
    if self.split == 'train':
      if not self.opt.not_rand_crop:
        s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
        w_border = self._get_border(128, img.shape[1])
        h_border = self._get_border(128, img.shape[0])
        c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
        c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
      else:
        sf = self.opt.scale
        cf = self.opt.shift
        c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
        c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
        s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
      
      if np.random.random() < self.opt.flip:
        flipped = True
        img = img[:, ::-1, :]
        c[0] =  width - c[0] - 1
        

    trans_input = get_affine_transform(
      c, s, 0, [input_w, input_h])
    inp = cv2.warpAffine(img, trans_input, 
                         (input_w, input_h),
                         flags=cv2.INTER_LINEAR)
    inp = (inp.astype(np.float32) / 255.)
    if self.split == 'train' and not self.opt.no_color_aug:
      color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
    inp = (inp - self.mean) / self.std
    inp = inp.transpose(2, 0, 1)

    output_h = input_h // self.opt.down_ratio
    output_w = input_w // self.opt.down_ratio
    num_classes = self.num_classes
    trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

    hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
    wh = np.zeros((self.max_objs, 2), dtype=np.float32)
    dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
    reg = np.zeros((self.max_objs, 2), dtype=np.float32)
    ind = np.zeros((self.max_objs), dtype=np.int64)
    reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
    cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
    cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)
    
    draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                    draw_umich_gaussian

    gt_det = []
    for k in range(num_objs):
      ann = anns[k]
      bbox = self._coco_box_to_bbox(ann['bbox'])
      cls_id = int(self.cat_ids[ann['category_id']])
      if flipped:
        bbox[[0, 2]] = width - bbox[[2, 0]] - 1
      bbox[:2] = affine_transform(bbox[:2], trans_output)
      bbox[2:] = affine_transform(bbox[2:], trans_output)
      bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
      bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
      h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
      if h > 0 and w > 0:
        radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        radius = max(0, int(radius))
        radius = self.opt.hm_gauss if self.opt.mse_loss else radius
        ct = np.array(
          [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
        ct_int = ct.astype(np.int32)
        lt = np.array(
          [bbox[0], bbox[1]], dtype=np.float32)
        lt_int = lt.astype(np.int32)
        rb = np.array(
          [bbox[2], bbox[3]], dtype=np.float32)
        rb_int = rb.astype(np.int32)

        draw_gaussian(hm[cls_id], ct_int, radius)
        draw_gaussian(hm[cls_id], lt_int, radius)
        draw_gaussian(hm[cls_id], rb_int, radius)
        wh[k] = 1. * w, 1. * h
        ind[k] = ct_int[1] * output_w + ct_int[0]
        reg[k] = ct - ct_int
        reg_mask[k] = 1
        cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
        cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
        if self.opt.dense_wh:
          draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
        gt_det.append([ct[0] - w / 2, ct[1] - h / 2, 
                       ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
    
    ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh}
    if self.opt.dense_wh:
      hm_a = hm.max(axis=0, keepdims=True)
      dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
      ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
      del ret['wh']
    elif self.opt.cat_spec_wh:
      ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
      del ret['wh']
    if self.opt.reg_offset:
      ret.update({'reg': reg})
    if self.opt.debug > 0 or not self.split == 'train':
      gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
               np.zeros((1, 6), dtype=np.float32)
      meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
      ret['meta'] = meta
    return ret
Example #3
0
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(
            self.img_dir,
            self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)
        img = self.coco.loadImgs(ids=[img_id])[0]
        w_img = int(img['width'])
        h_img = int(img['height'])

        labels = []
        bboxes = []
        shapes = []

        for anno in annotations:
            if anno['iscrowd'] == 1 or type(
                    anno['segmentation']) != list:  # Excludes crowd objects
                continue

            if len(anno['segmentation']) > 1:
                obj_contours = [
                    np.array(s).reshape((-1, 2)).astype(np.int32)
                    for s in anno['segmentation']
                ]
                obj_contours = sorted(obj_contours, key=cv2.contourArea)
                polygons = obj_contours[-1]
            else:
                polygons = anno['segmentation'][0]

            gt_x1, gt_y1, gt_w, gt_h = anno['bbox']
            contour = np.array(polygons).reshape((-1, 2))

            # Downsample the contour to fix number of vertices
            if cv2.contourArea(contour.astype(np.int32)) < 6:
                continue

            fixed_contour = uniformsample(contour, self.n_vertices)

            fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1,
                                          gt_x1 + gt_w)
            fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1,
                                          gt_y1 + gt_h)

            contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0)**2))
            if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan:  # invalid shapes
                continue

            updated_bbox = [
                np.min(fixed_contour[:, 0]),
                np.min(fixed_contour[:, 1]),
                np.max(fixed_contour[:, 0]),
                np.max(fixed_contour[:, 1])
            ]

            shapes.append(np.ndarray.flatten(fixed_contour).tolist())
            labels.append(self.cat_ids[anno['category_id']])
            bboxes.append(updated_bbox)

        labels = np.array(labels)
        bboxes = np.array(bboxes, dtype=np.float32)
        shapes = np.array(shapes, dtype=np.float32)

        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])
            shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32)
        # bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.],
                          dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(160, width)
            h_border = get_border(160, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        trans_img = get_affine_transform(
            center, scale, 0, [self.img_size['w'], self.img_size['h']])
        img = cv2.warpAffine(img, trans_img,
                             (self.img_size['w'], self.img_size['h']))

        img = img.astype(np.float32) / 255.

        if self.split == 'train':
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        trans_fmap = get_affine_transform(
            center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])

        hmap = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # heatmap
        w_h_ = np.zeros((self.max_objs, 2),
                        dtype=np.float32)  # width and height of bboxes
        shapes_ = np.zeros((self.max_objs, self.n_vertices * 2),
                           dtype=np.float32)  # gt amodal segmentation polygons
        center_offsets = np.zeros(
            (self.max_objs, 2),
            dtype=np.float32)  # gt mass centers to bbox center
        codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32)
        activated_codes = np.zeros(
            (self.max_objs, self.n_codes),
            dtype=np.int64)  # keep track of codes that is activated

        regs = np.zeros(
            (self.max_objs, 2),
            dtype=np.float32)  # regression for offsets of shape center
        inds = np.zeros((self.max_objs, ), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)

        for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                # Flip the contour x-axis
                for m in range(self.n_vertices):
                    shape[2 * m] = width - shape[2 * m] - 1

            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            # generate gt shape mean and std from contours
            for m in range(self.n_vertices
                           ):  # apply scale and crop transform to shapes
                shape[2 * m:2 * m + 2] = affine_transform(
                    shape[2 * m:2 * m + 2], trans_fmap)

            shape_clipped = np.reshape(shape, (self.n_vertices, 2))

            shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0,
                                          self.fmap_size['w'] - 1)
            shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0,
                                          self.fmap_size['h'] - 1)

            clockwise_flag = check_clockwise_polygon(shape_clipped)
            if not clockwise_flag:
                fixed_contour = np.flip(shape_clipped, axis=0)
            else:
                fixed_contour = shape_clipped.copy()
            # Indexing from the left-most vertex, argmin x-axis
            idx = np.argmin(fixed_contour[:, 0])
            indexed_shape = np.concatenate(
                (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0)

            mass_center = np.mean(indexed_shape, axis=0)
            if h < 1e-6 or w < 1e-6:  # remove small bboxes
                continue

            norm_shape = (indexed_shape - mass_center) / np.array(
                [w / 2., h / 2.])

            if h > 0 and w > 0:
                obj_c = np.array([(bbox[0] + bbox[2]) / 2,
                                  (bbox[1] + bbox[3]) / 2],
                                 dtype=np.float32)
                obj_c_int = obj_c.astype(np.int32)

                radius = max(
                    0,
                    int(
                        gaussian_radius((math.ceil(h), math.ceil(w)),
                                        self.gaussian_iou)))
                draw_umich_gaussian(hmap[label], obj_c_int, radius)
                shapes_[k] = norm_shape.reshape((1, -1))
                center_offsets[k] = mass_center - obj_c
                codes_[k], _ = fast_ista(norm_shape.reshape((1, -1)),
                                         self.dictionary,
                                         lmbda=self.sparse_alpha,
                                         max_iter=60)
                activated_codes[k] = (np.abs(codes_[k]) >
                                      1e-4) * 1  # active codes annotated 1

                w_h_[k] = 1. * w, 1. * h
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1

        return {
            'image': img,
            'shapes': shapes_,
            'codes': codes_,
            'offsets': center_offsets,
            'active': activated_codes,
            'hmap': hmap,
            'w_h_': w_h_,
            'regs': regs,
            'inds': inds,
            'ind_masks': ind_masks,
            'c': center,
            's': scale,
            'img_id': img_id
        }
Example #4
0
    def __get_default_coco(self, img, anns, num_objs):
        boxes = []
        if self.num_keypoints > 0:
            kpts = []

        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            boxes.append(BoundingBox(*bbox))

            if self.num_keypoints > 0:
                if 'keypoints' not in ann:
                    ann['keypoints'] = np.zeros((3 * self.num_keypoints, ))

                kpt = [
                    Keypoint(*x)
                    for x in np.array(ann['keypoints']).reshape(-1, 3)[:, :2]
                ]
                kpts.extend(kpt)

        bbs = BoundingBoxesOnImage(boxes, shape=img.shape)

        if self.num_keypoints > 0:
            kpts = KeypointsOnImage(kpts, shape=img.shape)

        if self.augmentation is not None:
            if self.num_keypoints > 0:
                img_aug, bbs_aug, kpts_aug = self.augmentation(
                    image=img, bounding_boxes=bbs, keypoints=kpts)
            else:
                img_aug, bbs_aug = self.augmentation(image=img,
                                                     bounding_boxes=bbs)
        else:
            if self.num_keypoints > 0:
                kpts_aug = kpts.copy()

            img_aug, bbs_aug = np.copy(img), bbs.copy()

        if self.num_keypoints > 0:
            img_aug, bbs_aug, kpts_aug = self.resize(image=img_aug,
                                                     bounding_boxes=bbs_aug,
                                                     keypoints=kpts_aug)
        else:
            img_aug, bbs_aug = self.resize(image=img_aug,
                                           bounding_boxes=bbs_aug)

        img = (img_aug.astype(np.float32) / 255.)
        inp = (img - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = self.input_size[1] // self.down_ratio
        output_w = self.input_size[0] // self.down_ratio
        num_classes = self.num_classes

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_detections, 2), dtype=np.float32)
        reg = np.zeros((self.max_detections, 2), dtype=np.float32)
        ind = np.zeros((self.max_detections), dtype=np.int64)
        reg_mask = np.zeros((self.max_detections), dtype=np.uint8)
        gt_det = np.zeros((self.max_detections, 6), dtype=np.float32)
        gt_areas = np.zeros((self.max_detections), dtype=np.float32)

        if self.num_keypoints > 0:
            kp = np.zeros((self.max_detections, self.num_keypoints * 2),
                          dtype=np.float32)
            gt_kp = np.zeros((self.max_detections, self.num_keypoints, 2),
                             dtype=np.float32)
            kp_reg_mask = np.zeros(
                (self.max_detections, self.num_keypoints * 2), dtype=np.uint8)

            bbs_aug, kpts_aug = self.resize_out(bounding_boxes=bbs_aug,
                                                keypoints=kpts_aug)
        else:
            bbs_aug = self.resize_out(bounding_boxes=bbs_aug)

        for k in range(num_objs):
            ann = anns[k]
            bbox_aug = bbs_aug[k].clip_out_of_image((output_w, output_h))
            bbox = np.array(
                [bbox_aug.x1, bbox_aug.y1, bbox_aug.x2, bbox_aug.y2])

            cls_id = int(self.cat_mapping[ann['category_id']])

            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((np.ceil(h), np.ceil(w)))
                radius = max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                gt_det[k] = ([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

                if self.num_keypoints > 0:
                    valid = np.array(ann["keypoints"]).reshape(-1, 3)[:, -1]
                    for i, p in enumerate(
                            kpts_aug[k * self.num_keypoints:k *
                                     self.num_keypoints + self.num_keypoints]):
                        kp[k][i * 2] = p.x - ct_int[0]
                        kp[k][i * 2 + 1] = p.y - ct_int[1]

                        is_valid = valid[i] == 2 and not p.is_out_of_image(
                            (output_w, output_w))
                        kp_reg_mask[k, i * 2] = int(is_valid)
                        kp_reg_mask[k, i * 2 + 1] = int(is_valid)
                        gt_kp[k][i] = p.x, p.y

                if "area" not in ann:
                    gt_areas[k] = w * h
                else:
                    gt_areas[k] = ann["area"]

        del bbs
        del bbs_aug
        del img_aug

        gt_det = np.array(gt_det,
                          dtype=np.float32) if len(gt_det) > 0 else np.zeros(
                              (1, 6), dtype=np.float32)

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'reg': reg,
            'gt_dets': gt_det,
            'gt_areas': gt_areas,
        }

        if self.num_keypoints > 0:
            ret['kps'] = kp
            ret['gt_kps'] = gt_kp
            ret['kp_reg_mask'] = kp_reg_mask
            del kpts_aug

        return ret
Example #5
0
  def __getitem__(self, index):
  
    img_id = self.images[index]
    file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
    img_path = os.path.join(self.img_dir, file_name)
    ann_ids = self.coco.getAnnIds(imgIds=[img_id])
    anns = self.coco.loadAnns(ids=ann_ids)
    num_objs = min(len(anns), self.max_objs)
    img = cv2.imread(img_path)
    height, width = img.shape[0], img.shape[1]
        
    c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
    s = max(img.shape[0], img.shape[1]) * 1.0
    rot = 0

    flipped = False
    if self.split == 'train':
      if not self.opt.not_rand_crop:
            #TODO这里是更改多尺度训练的地方。
        s = s#* np.random.choice(np.arange(0.8, 1.5, 0.1))#change 0.6 1.4
        w_border = self._get_border(128, img.shape[1])
        h_border = self._get_border(128, img.shape[0])
        c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
        c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
      else:
        sf = self.opt.scale
        cf = self.opt.shift
        c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
        c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
        s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
      if np.random.random() < self.opt.aug_rot:
        rf = self.opt.rotate
        rot = np.clip(np.random.randn()*rf, -rf*2, rf*2)
        
      if self.opt.angle_norm and self.split == 'train': 
          #首先是读取标注文件,获得中心点和头部点获得所有角度的集合 
        angle_list=[]
        for k in range(num_objs):
          ann = anns[k]
          bbox = self._coco_box_to_bbox(ann['bbox'])
          pts = np.array(ann['keypoints'][0:3], np.float32).reshape( self.num_joints, 3)#tmjx
          ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
          angle= math.atan2((pts[0, 0] - ct[0]), (pts[0, 1] - ct[1]))
          angle_list.append(angle)

        #下面这段代码求旋转的角度
        angle_list=np.array(angle_list)%np.pi #首先归一化到np.pi
        angle_int=(angle_list// (np.pi/9)).astype('int')
        angle_b=np.bincount(angle_int)
        index_rot=np.argmax(angle_b)
        ind_rot=(angle_list>(index_rot)*np.pi/9) *  (angle_list<=(index_rot+1)*np.pi/9)
        angle_rot=np.average(angle_list[ind_rot])
        rot=angle_rot*(-180)/np.pi
        
        
      if np.random.random() < self.opt.flip:
        flipped = True
        img = img[:, ::-1, :]
        c[0] =  width - c[0] - 1
        
    trans_input = get_affine_transform(
      c, s, rot, [self.opt.input_res, self.opt.input_res])

    inp = cv2.warpAffine(img, trans_input, 
                         (self.opt.input_res, self.opt.input_res),
                         flags=cv2.INTER_LINEAR)
    inp = (inp.astype(np.float32) / 255.)
    if self.split == 'train' and not self.opt.no_color_aug:
      color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
    inp = (inp - self.mean) / self.std
    inp = inp.transpose(2, 0, 1)

    output_res = self.opt.output_res
    num_joints = self.num_joints
    trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res])
    trans_output = get_affine_transform(c, s, 0, [output_res, output_res])

    hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32)
    hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32)
    dense_kps = np.zeros((num_joints, 2, output_res, output_res), 
                          dtype=np.float32)
    dense_kps_mask = np.zeros((num_joints, output_res, output_res), 
                               dtype=np.float32)
    wh = np.zeros((self.max_objs, 2), dtype=np.float32)
    kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32)
    reg = np.zeros((self.max_objs, 2), dtype=np.float32)
    ind = np.zeros((self.max_objs), dtype=np.int64)
    reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
    kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8)
    hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
    hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
    hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)

    draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                    draw_umich_gaussian

    gt_det = []
    for k in range(num_objs):
      ann = anns[k]
      bbox = self._coco_box_to_bbox(ann['bbox'])
      #TODO change wwlekeuihx  
      cls_id = int(ann['category_id']) - 1
      pts = np.array(ann['keypoints'][0:3], np.float32).reshape(num_joints, 3)#tmjx
      if flipped:
        bbox[[0, 2]] = width - bbox[[2, 0]] - 1
        pts[:, 0] = width - pts[:, 0] - 1
        #for e in self.flip_idx:
          #pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()
      # bbox[:2] = affine_transform(bbox[:2], trans_output)
      # bbox[2:] = affine_transform(bbox[2:], trans_output)
      #bbox = np.clip(bbox, 0, output_res - 1)
      h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
      center_obj=[(bbox[2] + bbox[0])/2,(bbox[3] + bbox[1])/2]
      center_obj=affine_transform(center_obj, trans_output_rot)
      scale_trans= self.opt.output_res/s
      h *= scale_trans
      w *= scale_trans
      h = np.clip(h , 0,  output_res - 1)
      w = np.clip(w , 0, output_res - 1)
      if (h > 0 and w > 0) or (rot != 0):
        radius = gaussian_radius((math.ceil(h), math.ceil(w))) *1.2
        sqrt_wh = np.sqrt(np.sqrt(h*w))
        radius_w = radius * np.sqrt(w) / sqrt_wh
        radius_h = radius * np.sqrt(h) / sqrt_wh
        radius_w = self.opt.hm_gauss if self.opt.mse_loss else max(0, np.ceil(radius_w)) 
        radius_h = self.opt.hm_gauss if self.opt.mse_loss else max(0, np.ceil(radius_h)) 
        # radius = self.opt.hm_gauss if self.opt.mse_loss else max(0, int(radius)) 
        ct = np.array( center_obj, dtype=np.float32)
        # if  self.opt.Rguass: 
        if ct[0]<0 or ct[0]>output_res - 1 or ct[1]<0 or ct[1]>output_res - 1: #
              continue
        ct[0] = np.clip(ct[0], 0, output_res - 1)
        ct[1] = np.clip(ct[1], 0, output_res - 1)
        ct_int = ct.astype(np.int32)
        wh[k] = 1. * w, 1. * h
        ind[k] = ct_int[1] * output_res + ct_int[0]
        reg[k] = ct - ct_int
        reg_mask[k] = 1
        num_kpts = pts[:, 2].sum()
        if num_kpts == 0:
          hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
          reg_mask[k] = 0

        hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        hp_radius = self.opt.hm_gauss \
                    if self.opt.mse_loss else max(0, int(hp_radius)) 
        for j in range(num_joints):
          if pts[j, 2] > 0:
            pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot)
            if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
               pts[j, 1] >= 0 and pts[j, 1] < output_res:
              kps[k, j * 2: j * 2 + 2] = pts[j, :2] - ct_int
              kps_mask[k, j * 2: j * 2 + 2] = 1
              pt_int = pts[j, :2].astype(np.int32)
              hp_offset[k * num_joints + j] = pts[j, :2] - pt_int
              hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0]
              hp_mask[k * num_joints + j] = 1
              if self.opt.dense_hp:
                # must be before draw center hm gaussian
                draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, 
                               pts[j, :2] - ct_int, radius, is_offset=True)
                draw_gaussian(dense_kps_mask[j], ct_int, radius)
              draw_gaussian(hm_hp[j], pt_int, hp_radius)
        #TODO change
        angle= math.atan2((pts[0, 0] - ct[0]), (pts[0, 1] - ct[1]))
        if  self.opt.Rguass:
              draw_gaussian(hm[cls_id], ct_int, [radius_w,radius_h,angle])
        else:
              radius = self.opt.hm_gauss if self.opt.mse_loss else max(0, int(radius)) 
              draw_gaussian(hm[cls_id], ct_int, radius)
        gt_det.append([ct[0] - w / 2, ct[1] - h / 2, 
                       ct[0] + w / 2, ct[1] + h / 2, 1] + 
                       pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])
    # if rot != 0:
    #   hm = hm * 0 + 0.9999
    #   reg_mask *= 0
    #   kps_mask *= 0

    ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh,
           'hps': kps, 'hps_mask': kps_mask}
    
    if self.opt.dense_hp:
      dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res)
      dense_kps_mask = dense_kps_mask.reshape(
        num_joints, 1, output_res, output_res)
      dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1)
      dense_kps_mask = dense_kps_mask.reshape(
        num_joints * 2, output_res, output_res)
      ret.update({'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask})
      del ret['hps'], ret['hps_mask']
    if self.opt.reg_offset:
      ret.update({'reg': reg})
    if self.opt.hm_hp:
      ret.update({'hm_hp': hm_hp})
    if self.opt.reg_hp_offset:
      ret.update({'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask})
    if self.opt.debug > 0 or not self.split == 'train':
      gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
               np.zeros((1, 40), dtype=np.float32)
      meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
      ret['meta'] = meta
#这里是调试可视化生成的特征图的程序
    # debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug==3),
    #                     theme=self.opt.debugger_theme)
    # inp1 = inp.transpose(1,2,0)
    # inp1=(inp1*self.std + self.mean)*255.
    # self.debug(debugger, inp1,  ret)
    return ret
Example #6
0
    def __getitem__(self, index):
        img_id = self.ids[index]

        file_name = self.hoi_annotations[img_id]['file_name']
        img_path = os.path.join(self.root, self.image_dir, file_name)
        anns = self.hoi_annotations[img_id]['annotations']
        hoi_anns = self.hoi_annotations[img_id]['hoi_annotation']
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.7, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        hm_human = np.zeros((1, output_h, output_w), dtype=np.float32)
        hm_rel = np.zeros((self.num_classes_verb, output_h, output_w),
                          dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)

        sub_offset = np.zeros((self.max_rels, 2), dtype=np.float32)
        obj_offset = np.zeros((self.max_rels, 2), dtype=np.float32)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        gt_det = []

        bbox_ct = []
        num_rels = min(len(hoi_anns), self.max_rels)
        for k in range(num_objs):
            ann = anns[k]
            bbox = np.asarray(ann['bbox'])
            if isinstance(ann['category_id'], str):
                ann['category_id'] = int(ann['category_id'].replace('\n', ''))
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                          dtype=np.float32)

            ct_int = ct.astype(np.int32)
            bbox_ct.append(ct_int.tolist())
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                if cls_id == 0:
                    draw_gaussian(hm_human[cls_id], ct_int, radius)
                else:
                    draw_gaussian(hm[cls_id], ct_int, radius)

                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        offset_mask = np.zeros((self.max_rels), dtype=np.uint8)
        rel_ind = np.zeros((self.max_rels), dtype=np.int64)
        for k in range(num_rels):
            hoi = hoi_anns[k]
            if isinstance(hoi['category_id'], str):
                hoi['category_id'] = int(hoi['category_id'].replace('\n', ''))
            hoi_cate = int(self.cat_ids_verb[hoi['category_id']])
            sub_ct = bbox_ct[hoi['subject_id']]
            obj_ct = bbox_ct[hoi['object_id']]
            offset_mask[k] = 1
            rel_ct = np.array([(sub_ct[0] + obj_ct[0]) / 2,
                               (sub_ct[1] + obj_ct[1]) / 2],
                              dtype=np.float32)
            radius = gaussian_radius((math.ceil(abs(sub_ct[0] - obj_ct[0])),
                                      math.ceil(abs(sub_ct[1] - obj_ct[1]))))
            radius = max(0, int(radius))
            radius = self.opt.hm_gauss if self.opt.mse_loss else radius
            rel_ct_int = rel_ct.astype(np.int32)
            draw_gaussian(hm_rel[hoi_cate], rel_ct_int, radius)
            rel_sub_offset = np.array(
                [rel_ct_int[0] - sub_ct[0], rel_ct_int[1] - sub_ct[1]],
                dtype=np.float32)
            rel_obj_offset = np.array(
                [rel_ct_int[0] - obj_ct[0], rel_ct_int[1] - obj_ct[1]],
                dtype=np.float32)
            sub_offset[k] = 1. * rel_sub_offset[0], 1. * rel_sub_offset[1]
            obj_offset[k] = 1. * rel_obj_offset[0], 1. * rel_obj_offset[1]
            rel_ind[k] = rel_ct_int[1] * output_w + rel_ct_int[0]

        ret = {
            'input': inp,
            'hm': hm,
            'hm_human': hm_human,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'hm_rel': hm_rel,
            'sub_offset': sub_offset,
            'obj_offset': obj_offset,
            'offset_mask': offset_mask,
            'rel_ind': rel_ind
        }
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        return ret
Example #7
0
    def __getitem__(self, index):
        img_id = self.images[index]
        video_info = self.coco.loadImgs(ids=[img_id])[0]
        file_name = video_info['file_name']
        image_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        input_h, input_w = self.opt.input_h, self.opt.input_w

        centers = np.array([ann['keypoints_2d'] for ann in anns])[:, 0::9, :2]
        centers = centers.reshape(-1, 2)
        keep = np.where(np.all((0 < centers) & (1 > centers), axis=1) == True)
        centers = centers[keep]
        anns = [anns[i] for i in keep[0]]

        img = cv2.imread(image_path)

        # resize, pad, and color augs
        centers[:, 0], centers[:, 1] = centers[:, 0]*img.shape[1], centers[:, 1]*img.shape[0]
        augmented = self.augs(image=img, keypoints=centers)
        inp, centers = augmented['image'], np.array(augmented['keypoints'])
        num_objs = min(len(centers), self.max_objs)
        wh_ratio = img.shape[1] / img.shape[0]
        c = np.array([inp.shape[1] / 2., inp.shape[0] / 2.], dtype=np.float32)
        s = max(inp.shape[0], inp.shape[1]) * 1.0

        aug = False
        if self.split == 'train' and np.random.random() < self.opt.aug_ddd and num_objs > 0:
            aug = True
            sf = self.opt.scale
            # cf = self.opt.shift
            scale_rand = np.random.random()
            s = s * np.clip(scale_rand * sf + 1, 1 - sf, 1 + sf)

            trans_input = get_affine_transform(
                c, s, 0, [input_w, input_h])
            inp = cv2.warpAffine(inp, trans_input,
                                 (input_w, input_h),
                                 flags=cv2.INTER_LINEAR)

            centers = np.concatenate([centers, np.ones((centers.shape[0], 1))], axis=1)
            centers = np.matmul(trans_input, centers.T).T

        if num_objs > 0:
            centers[:, 0], centers[:, 1] = centers[:, 0] / inp.shape[1], centers[:, 1] / inp.shape[0]

        inp = (inp.astype(np.float32) / 255.)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio

        # empty input
        heat_map = np.zeros([self.num_classes, output_h, output_w], dtype=np.float32)
        scales = np.zeros([self.max_objs, 3], dtype=np.float32)
        translations = np.zeros([self.max_objs, 3], dtype=np.float32)
        rotvecs = np.zeros([self.max_objs, 3], dtype=np.float32)
        reg_mask = np.zeros([self.max_objs], dtype=np.uint8)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)

        for k in range(num_objs):
            ann = anns[k]
            bbox = np.array(ann['bbox'])
            scale = np.array(ann['scale'])
            rot_angles = np.array(ann['rot'])
            translation = np.array(ann['translation'])

            if aug:
                translation[2] *= np.clip(scale_rand * sf + 1, 1 - sf, 1 + sf)
                # translation[0] += translation[0] * y_shift * cf
                # translation[1] -= (x_shift * cf) * 0.3

            ct = centers[k][:2]

            ct[0], ct[1] = ct[0] * output_h, ct[1] * output_w
            ct[0], ct[1] = np.clip(ct[0], 0, output_w - 1), np.clip(ct[1], 0, output_w - 1)

            cls_id = int(self.cat_ids[ann['category_id']])

            bbox[[0, 2]] *= output_w
            bbox[[1, 3]] *= output_h

            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)

            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius/2))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct_int = ct.astype(np.int32)
                draw_umich_gaussian(heat_map[cls_id], ct_int, radius)
                scales[k] = scale
                translations[k] = translation
                rotvecs[k] = rot_angles

                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1

                if DEBUG:
                    lines = (
                        [1, 5], [2, 6], [3, 7], [4, 8],  # lines along x-axis
                        [1, 3], [5, 7], [2, 4], [6, 8],  # lines along y-axis
                        [1, 2], [3, 4], [5, 6], [7, 8]  # lines along z-axis
                    )

                    plt.scatter(ct_int[0], ct_int[1])
                    r = R.from_euler('zyx', rot_angles).as_matrix()

                    box_3d = Box.from_transformation(r, translation, scale).vertices
                    points_2d = project_points(box_3d, np.array(video_info['projection_matrix']))
                    points_2d[:, 0] = points_2d[:, 0] * (128*wh_ratio) + 128*(1-wh_ratio)/2
                    points_2d[:, 1] *= 128
                    points_2d = points_2d.astype(int)
                    for ids in lines:
                        plt.plot(
                            (points_2d[ids[0]][0], points_2d[ids[1]][0]),
                            (points_2d[ids[0]][1], points_2d[ids[1]][1]),
                            color='r',
                        )

                    # points_2d = np.array(ann['keypoints_2d'])
                    # points_2d[:, 0] *= 128
                    # points_2d[:, 1] *= 128
                    #
                    # points_2d = points_2d.astype(int)
                    # for ids in lines:
                    #     plt.plot(
                    #         (points_2d[ids[0]][0], points_2d[ids[1]][0]),
                    #         (points_2d[ids[0]][1], points_2d[ids[1]][1]),
                    #         color='b',
                    #     )


        ret = {
            'input': inp,
            'hm': heat_map,
            'reg_mask': reg_mask,
            'ind': ind,
            'dim': scales,
            'rot': rotvecs,
            'loc': translations
        }

        if self.opt.reg_offset:
            ret.update({'reg': reg})

        if DEBUG:
            if inp.shape[0] == 3:
                plot_img = inp.transpose(1, 2, 0)
                plot_img = (plot_img * self.std) + self.mean
            else:
                plot_img = inp.copy()

            plot_img = cv2.resize(plot_img, (output_w, output_h))
            plot_img = cv2.cvtColor(plot_img, cv2.COLOR_BGR2RGB)
            plt.imshow(plot_img)
            plt.show()
            plt.imshow(heat_map[0])
            plt.show()

        return ret
Example #8
0
    def __getitem__(self, index):
        img_id = self.images[index]
        #loadImgs(ids=[img_id]) return a list, whose length = 1
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)
        cropped = False
        if self.split == 'train':
            if np.random.random() < 1:
                cropped = True
                file_name = file_name.split('.')[0] + 'crop.jpg'
                img_path = os.path.join(self.img_dir, file_name)
        if self.split == 'val':
            cropped = True

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0
        rot = 0

        flipped = False
        rotted = False
        if self.split == 'train':
            if not self.opts.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self.get_border(128, img.shape[1])
                h_border = self.get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opts.scale
                cf = self.opts.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            if np.random.random() < self.opts.aug_rot:
                rotted = True
                rf = self.opts.rotate
                rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)

            if np.random.random() < self.opts.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        # input_res is max(input_h, input_w), input is the size of original img
        if np.random.random() < self.opts.keep_inp_res_prob and max(
            (height | 127) + 1, (width | 127) + 1) < 1024:
            self.opts.input_h = (height | 127) + 1
            self.opts.input_w = (width | 127) + 1
            self.opts.output_h = self.opts.input_h // self.opts.down_ratio
            self.opts.output_w = self.opts.input_w // self.opts.down_ratio
            self.opts.input_res = max(self.opts.input_h, self.opts.input_w)
            self.opts.output_res = max(self.opts.output_h, self.opts.output_w)

        trans_input = get_affine_transform(
            c, s, rot, [self.opts.input_res, self.opts.input_res])
        inp = cv2.warpAffine(img,
                             trans_input,
                             (self.opts.input_res, self.opts.input_res),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opts.no_color_aug:
            color_aug(self.data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        #change data shape to [3, input_size, input_size]
        inp = inp.transpose(2, 0, 1)

        #output_res is max(output_h, output_w), output is the size after down sampling
        output_res = self.opts.output_res
        num_joints = self.num_joints
        trans_output_rot = get_affine_transform(c, s, rot,
                                                [output_res, output_res])
        trans_output = get_affine_transform(c, s, 0, [output_res, output_res])

        hm = np.zeros((self.num_classes, output_res, output_res),
                      dtype=np.float32)
        hm_hp = np.zeros((num_joints, output_res, output_res),
                         dtype=np.float32)
        dense_kps = np.zeros((num_joints, 2, output_res, output_res),
                             dtype=np.float32)
        dense_kps_mask = np.zeros((num_joints, output_res, output_res),
                                  dtype=np.float32)

        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        kps = np.zeros((self.max_objs, 2 * num_joints), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        kps_mask = np.zeros((self.max_objs, self.num_joints * 2),
                            dtype=np.uint8)
        hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
        hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
        hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)

        draw_gaussian = draw_msra_gaussian if self.opts.mse_loss else \
            draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            if cropped:
                bbox = np.array(ann['bbox'])
            else:
                bbox = np.array(ann['org_bbox'])
            cls_id = int(ann['category_id']) - 1
            if cropped:
                pts = np.array(ann['keypoints'],
                               np.float32).reshape(num_joints, 3)
            else:
                pts = np.array(ann['org_keypoints'],
                               np.float32).reshape(num_joints, 3)
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                pts[:, 0] = width - pts[:, 0] - 1
                for joint_idx in self.flip_idx:
                    pts[joint_idx[0]], pts[joint_idx[1]] = pts[
                        joint_idx[1]].copy(), pts[
                            joint_idx[0]].copy()  #don't forget copy first
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            if rotted:
                pts_rot = np.zeros((num_joints, 2))
                for j in range(num_joints):
                    if pts[j, 2] > 0:
                        pts_rot[j, :2] = affine_transform(
                            pts[j, :2], trans_output_rot)
                bbox[:2] = np.min(pts_rot, axis=0)
                bbox[2:] = np.max(pts_rot, axis=0)
            bbox = np.clip(bbox, 0, output_res - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            if (h > 0 and w > 0) or (rot != 0):
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = self.opts.hm_gauss if self.opts.mse_loss else max(
                    0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_res + ct_int[0]
                reg[k] = ct - ct_int  # the error of center[x, y]
                reg_mask[k] = 1
                num_kpts = pts[:, 2].sum()  #whether joint can be seen or not
                if num_kpts == 0:
                    hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
                    reg_mask[k] = 0  #means this obj can'e be seen

                hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)),
                                            min_overlap=1)
                hp_radius = self.opts.hm_gauss if self.opts.mse_loss else max(
                    0, int(hp_radius))
                for j in range(num_joints):
                    if pts[j, 2] > 0:  #means this joint can be seen
                        pts[j, :2] = affine_transform(pts[j, :2],
                                                      trans_output_rot)
                        if pts[j, 0] >= 0 and pts[j, 0] < output_res and pts[
                                j, 1] >= 0 and pts[j, 1] < output_res:
                            kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int
                            kps_mask[k, j * 2:j * 2 + 2] = 1
                            pt_int = pts[j, :2].astype(np.int32)
                            hp_offset[k * num_joints + j] = pts[j, :2] - pt_int
                            hp_ind[k * num_joints +
                                   j] = pt_int[1] * output_res + pt_int[0]
                            hp_mask[k * num_joints + j] = 1
                            if self.opts.dense_hp:
                                # must be before draw center hm gaussian
                                draw_dense_reg(dense_kps[j],
                                               hm[cls_id],
                                               ct_int,
                                               pts[j, :2] - ct_int,
                                               radius,
                                               is_offset=True)
                                draw_gaussian(dense_kps_mask[j], ct_int,
                                              radius)
                            hp1 = draw_gaussian(hm_hp[j], pt_int, hp_radius)
                            # plt.imsave('/home/mry/Desktop/testimg/hp_{}_{}.jpg'.format(k, j), hp1)
                draw_gaussian(hm[cls_id], ct_int, radius)
                ##ge_det:x0, y0, x1, y1, joint1_x, joint1_y,...,joint17_x, joint17_y, cls_id
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] +
                    h / 2, 1
                ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])

        #if rot != 0:
        #    hm = hm * 0 + 0.9999
        #    reg_mask *= 0
        #    kps_mask *= 0
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'hps': kps,
            'hps_mask': kps_mask
        }

        if self.opts.dense_hp:
            dense_kps = dense_kps.reshape(num_joints * 2, output_res,
                                          output_res)
            dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res,
                                                    output_res)
            dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask],
                                            axis=1)
            dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res,
                                                    output_res)
            ret.update({
                'dense_hps': dense_kps,
                'dense_hps_mask': dense_kps_mask
            })
            del ret['hps'], ret['hps_mask']
        if self.opts.reg_offset:
            ret.update({'reg': reg})
        if self.opts.hm_hp:
            ret.update({'hm_hp': hm_hp})
        if self.opts.reg_hp_offset:
            ret.update({
                'hp_offset': hp_offset,
                'hp_ind': hp_ind,
                'hp_mask': hp_mask
            })
        if self.opts.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 40), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Example #9
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)
        img_show = copy.deepcopy(img)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        # flipped = False         # remove flip process
        ############## remove image preprocess
        # if self.split == 'train':
        #     if not self.opt.not_rand_crop:
        #         s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
        #         w_border = self._get_border(128, img.shape[1])
        #         h_border = self._get_border(128, img.shape[0])
        #         c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
        #         c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
        #     else:
        #         sf = self.opt.scale
        #         cf = self.opt.shift
        #         c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
        #         c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
        #         s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
        #
        #     if np.random.random() < self.opt.flip:
        #         flipped = True
        #         img = img[:, ::-1, :]
        #         c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        ################## plot
        # cv2.imwrite('/Workspace/CenterNet/in_{}'.format(file_name), inp)

        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])
        ################# plot
        # inp_out = cv2.warpAffine(img_show, trans_output,
        #                          (output_w, output_h),
        #                          flags=cv2.INTER_LINEAR)
        # for k in range(num_objs):
        #     ann = anns[k]
        #     bbox_show = copy.deepcopy(ann['bbox'])
        #     bbox_show[:2] = affine_transform(bbox_show[:2], trans_output)
        #     cv2.circle(inp_out, tuple(list(map(int, bbox_show[:2]))), 2, (0, 0, 255), -1)
        # print('file {} num  {}'.format(file_name, num_objs))
        # cv2.imwrite('/Workspace/CenterNet/out_{}'.format(file_name), inp_out)

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)

        ### add angle regression
        reg_angle = np.zeros((self.max_objs, 1), dtype=np.float32)

        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        ########### show gt
        # for k in range(num_objs):
        #   ann = anns[k]
        #   bbox = ann['bbox']
        #   bbox[:2] = affine_transform(bbox[:2], trans_output)
        #   bbox[2:4] = affine_transform(bbox[2:4], trans_output)
        #   bbox[0] = np.clip(bbox[0], 0, output_w - 1)
        #   bbox[1] = np.clip(bbox[1], 0, output_h - 1)
        #   self.getfourpoints(bbox, inp_out)
        # cv2.imwrite('/Workspace/CenterNet/gt_{}'.format(file_name), inp_out)

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            # bbox = self._coco_box_to_bbox(ann['bbox'])
            bbox = ann['bbox']
            cls_id = int(self.cat_ids[ann['category_id']])

            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:4] = affine_transform(bbox[2:4], trans_output)
            bbox[0] = np.clip(bbox[0], 0, output_w - 1)
            bbox[1] = np.clip(bbox[1], 0, output_h - 1)
            h, w = bbox[3], bbox[2]
            if h > 0 and w > 0:

                ct = np.array([bbox[0], bbox[1]], dtype=np.float32)
                ct_int = ct.astype(np.int32)
                reg_angle[k] = bbox[4]
                if not self.opt.ellipse:
                    radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                    radius = max(0, int(radius))
                    radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                    draw_gaussian(hm[cls_id], ct_int, radius)
                else:
                    draw_ellipse_gaussian(hm[cls_id], ct_int, w, h,
                                          reg_angle[k])
                wh[k] = 1. * w, 1. * h

                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])
        if self.opt.ellipse:
            hm = np.where(hm > 1e-2, hm, 0)
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'angle': reg_angle
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Example #10
0
    def __getitem__(self, index):
        image_fn = self.flist[index]
        image = cv2.imread(image_fn)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        
        box_fn = str(Path(self.box_root)/(Path(image_fn).stem + '.txt'))
        
        if osp.exists(box_fn):
            xywh = np.loadtxt(box_fn)
        
        
            xx,yy,ww,hh = xywh
            x1,y1,x2,y2 = xx-ww/2,yy-hh/2,xx+ww/2,yy+hh/2

        
            boxes = np.array([[x1,y1,x2,y2]]).astype('float32')
        else:
            boxes = np.array([[0.0,0.0,1.0,1.0]]).astype('float32')
        
 
        if self.transform:
           image, boxes = self.transform(image, boxes)

        
        #generate box_gt for loss
        #box x1,y1,x2,y2, [0,1]
        output_h,output_w,grid_wh = self.configs.hh,self.configs.ww,self.configs.grid_wh
        hin,win = self.configs.image_size
        
        hm = np.zeros((self.configs.num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.configs.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        dense_xy = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.configs.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.configs.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.configs.max_objs), dtype=np.uint8)


        
        num_objs = min(boxes.shape[0], self.configs.max_objs)
        
        
#        gt_det = []
        for k in range(num_objs):
          bbox = boxes[k]
          h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
          if h > 0 and w > 0:
            radius = gaussian_radius((math.ceil(h*grid_wh), math.ceil(w*grid_wh)))
            radius = max(0, int(radius))
            #radius = self.opt.hm_gauss if self.opt.mse_loss else radius
            ct = np.array(
              [(bbox[0] + bbox[2]) / 2.0 * grid_wh, (bbox[1] + bbox[3]) / 2.0* grid_wh], dtype=np.float32)
            ct_int = ct.astype(np.int32)
            ct_int = np.clip(ct_int, 0, grid_wh-1)
            
            draw_umich_gaussian(hm[k], ct_int, radius)
            
            wh[k] = 1. * w, 1. * h
            ind[k] = ct_int[1] * output_w + ct_int[0]
            reg[k] = ct - ct_int
            reg_mask[k] = 1

            
            draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
            draw_dense_reg(dense_xy, hm.max(axis=0), ct_int, reg[k], radius)
            
#            gt_det.append([ct[0] - w / 2, ct[1] - h / 2, 
#                           ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
#        
        #ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh}
        #if self.opt.dense_wh:
        hm_a = hm.max(axis=0, keepdims=True)
        dense_mask = np.concatenate([hm_a, hm_a], axis=0)
        
        ret = {'hm': hm, 'wh': wh, 'xy': reg, 'ind': ind,'dense_xy': dense_xy,'dense_wh': dense_wh,'dense_mask':dense_mask, 'boxes': boxes}
        
        #ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
        #del ret['wh']
        #elif self.opt.cat_spec_wh:
          #ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
          #del ret['wh']
        #if self.opt.reg_offset:
          #ret.update({'reg': reg})
#        if self.opt.debug > 0 or not self.split == 'train':
#          gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
#                   np.zeros((1, 6), dtype=np.float32)
#          meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
#          ret['meta'] = meta
#        return ret        
#        
        
        return image, ret
Example #11
0
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)
        img = self.coco.loadImgs(ids=[img_id])[0]
        w_img = int(img['width'])
        h_img = int(img['height'])

        labels = []
        bboxes = []
        shapes = []

        for anno in annotations:
            if anno['iscrowd'] == 1:  # Excludes crowd objects
                continue

            polygons = get_connected_polygon_using_mask(anno['segmentation'], (h_img, w_img),
                                                        n_vertices=self.n_vertices, closing_max_kernel=50)

            gt_x1, gt_y1, gt_w, gt_h = anno['bbox']
            contour = np.array(polygons).reshape((-1, 2))

            # Downsample the contour to fix number of vertices
            if len(contour) > self.n_vertices:
                fixed_contour = resample(contour, num=self.n_vertices)
            else:
                fixed_contour = turning_angle_resample(contour, self.n_vertices)

            fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w)
            fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h)

            # contour_mean = np.mean(fixed_contour, axis=0)
            contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0) ** 2))
            if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan:  # invalid shapes
                continue

            updated_bbox = [np.min(fixed_contour[:, 0]), np.min(fixed_contour[:, 1]),
                            np.max(fixed_contour[:, 0]), np.max(fixed_contour[:, 1])]

            shapes.append(np.ndarray.flatten(fixed_contour).tolist())
            labels.append(self.cat_ids[anno['category_id']])
            bboxes.append(updated_bbox)

        labels = np.array(labels)
        bboxes = np.array(bboxes, dtype=np.float32)
        shapes = np.array(shapes, dtype=np.float32)

        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])
            shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32)  # in xyxy format

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.], dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(128, width)
            h_border = get_border(128, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        trans_img = get_affine_transform(center, scale, 0, [self.img_size['w'], self.img_size['h']])
        img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h']))

        # -----------------------------------debug---------------------------------
        # image_show = img.copy()
        # for bbox, label in zip(bboxes, labels):
        #   if flipped:
        #     bbox[[0, 2]] = width - bbox[[2, 0]] - 1
        #   bbox[:2] = affine_transform(bbox[:2], trans_img)
        #   bbox[2:] = affine_transform(bbox[2:], trans_img)
        #   bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1)
        #   bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1)
        #   cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
        #   cv2.putText(image_show, self.class_name[label + 1], (int(bbox[0]), int(bbox[1])),
        #               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        # cv2.imshow('img', image_show)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------

        img = img.astype(np.float32) / 255.

        if self.split == 'train':
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        trans_fmap = get_affine_transform(center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])

        hmap = np.zeros((self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32)  # heatmap
        w_h_ = np.zeros((self.max_objs, 4), dtype=np.float32)  # width and height of the shape
        real_ = torch.FloatTensor(np.zeros((self.max_objs, self.n_vertices), dtype=np.float32))
        imaginary_ = torch.FloatTensor(np.zeros((self.max_objs, self.n_vertices), dtype=np.float32))
        regs = np.zeros((self.max_objs, 2), dtype=np.float32)  # regression for 4 offsets of center of mass to the bbox
        inds = np.zeros((self.max_objs,), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs,), dtype=np.uint8)

        # detections = []
        for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                # Flip the contour
                for m in range(self.n_vertices):
                    shape[2 * m] = width - shape[2 * m] - 1

            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            # generate gt shape mean and std from contours
            for m in range(self.n_vertices):  # apply scale and crop transform to shapes
                shape[2 * m:2 * m + 2] = affine_transform(shape[2 * m:2 * m + 2], trans_fmap)

            shape_clipped = np.reshape(shape, (self.n_vertices, 2))

            shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0, self.fmap_size['w'] - 1)
            shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0, self.fmap_size['h'] - 1)

            clockwise_flag = check_clockwise_polygon(shape_clipped)
            if not clockwise_flag:
                fixed_contour = np.flip(shape_clipped, axis=0)
            else:
                fixed_contour = shape_clipped.copy()
            # Indexing from the left-most vertex, argmin x-axis
            idx = np.argmin(fixed_contour[:, 0])
            indexed_shape = np.concatenate((fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0)

            # box_center = np.array([(bbox[0] + bbox[2]) / 2., (bbox[1] + bbox[3]) / 2.], dtype=np.float32)
            mass_center = np.mean(indexed_shape, axis=0)
            if h < 1e-6 or w < 1e-6:  # remove small bboxes
                continue

            centered_shape = indexed_shape - mass_center

            if h > 0 and w > 0:
                obj_c = mass_center
                obj_c_int = obj_c.astype(np.int32)

                radius = max(0, int(gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou)))
                draw_umich_gaussian(hmap[label], obj_c_int, radius)
                fourier_results = torch.fft(torch.FloatTensor(centered_shape), signal_ndim=1) / 32.
                real_[k] = fourier_results[:, 0]
                imaginary_[k] = fourier_results[:, 1]
                w_h_[k] = mass_center[1] - bbox[1], bbox[3] - mass_center[1], \
                          mass_center[0] - bbox[0], bbox[2] - mass_center[0]  # [top, bottom, left, right] distance
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1

        # detections = np.array(detections, dtype=np.float32) \
        #   if len(detections) > 0 else np.zeros((1, 6), dtype=np.float32)

        # -----------------------------------debug---------------------------------
        # canvas = np.zeros((self.fmap_size['h'] * 2, self.fmap_size['w'] * 2, 3), dtype=np.float32)
        # canvas[0:self.fmap_size['h'], 0:self.fmap_size['w'], :] = np.tile(np.expand_dims(hmap[0], 2), (1, 1, 3))
        # canvas[0:self.fmap_size['h'], self.fmap_size['w']:, :] = np.tile(np.expand_dims(hmap[1], 2), (1, 1, 3))
        # canvas[self.fmap_size['h']:, 0:self.fmap_size['w'], :] = np.tile(np.expand_dims(hmap[2], 2), (1, 1, 3))
        # canvas[self.fmap_size['h']:, self.fmap_size['w']:, :] = np.tile(np.expand_dims(hmap[3], 2), (1, 1, 3))
        # print(w_h_[0], regs[0])
        # cv2.imshow('hmap', canvas)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------

        return {'image': img, 'real': real_.numpy(), 'imaginary': imaginary_.numpy(),
                'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks,
                'c': center, 's': scale, 'img_id': img_id}
Example #12
0
  def __getitem__(self, index):
    img_id = self.images[index]
    file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
    img_path = os.path.join(self.img_dir, file_name)



    ann_ids = self.coco.getAnnIds(imgIds=[img_id])
    anns = self.coco.loadAnns(ids=ann_ids)

    num_objs = min(len(anns), self.max_objs)

    channel_counter = 1  # len(self.coco.getCatIds())

    # target
    # target_img = cv2.imread(img_path)

    N_FRAMES = self.opt.nbr_frames
    middle = int(N_FRAMES/2)
    index = os.path.basename(img_path).replace('.jpg', '').replace('img', '').replace('.JPEG', '')
    rest = img_path.replace(index + '.jpg', '').replace(os.path.dirname(img_path), '')
    length = len(index)
    modulo = '1'
    for i in range(length):
      modulo += '0'

    img_paths = []
    for i in range(N_FRAMES):
      new_img_path = os.path.dirname(img_path) \
                     + rest \
                     + str((int(index) - (i-middle)) % int(modulo)).zfill(length) + '.jpg'
      if not os.path.exists(new_img_path):
        new_img_path = img_path
      img_paths.append(new_img_path)

    imgs = []
    for path in img_paths:
      imgs.append(cv2.imread(path))
    img = np.concatenate(imgs, -1)

    bboxes = {}
    for ann in anns:
      if str(ann['category_id']) in bboxes:
        bboxes[str(ann['category_id'])].append([int(ann['bbox'][0]),
                      int(ann['bbox'][1]),
                     int(ann['bbox'][0] + ann['bbox'][2]),
                      int(ann['bbox'][1] + ann['bbox'][3])])
      else:
        bboxes[str(ann['category_id'])] = [[int(ann['bbox'][0]),
                                     int(ann['bbox'][1]),
                                     int(ann['bbox'][0] + ann['bbox'][2]),
                                     int(ann['bbox'][1] + ann['bbox'][3])]]

    # """
    PYFLOW = True
    if PYFLOW:
      if 'uav' in self.opt.dataset:
        seg_path = os.path.join('/store/datasets/UAV/bgsubs',
                                os.path.dirname(file_name).split('/')[-1],
                                os.path.basename(file_name).replace('jpg', 'png'))
      else:
        seg_path = os.path.join('/store/datasets/OlderUA-Detrac/pyflow-bgsubs',
                                os.path.dirname(file_name).split('/')[-1],
                                os.path.basename(file_name).replace('jpg', 'png'))

    # """
    if PYFLOW:
      seg_img = cv2.imread(seg_path, 0)  # hughes
    else:
      seg_img = np.zeros([channel_counter, img.shape[0], img.shape[1]])
      for label in range(1, channel_counter+1):
        if str(label) in bboxes:
          for bbox in bboxes[str(label)]:
            seg_img[label-1, bbox[1]:bbox[3], bbox[0]:bbox[2]] = 255


    height, width = img.shape[0], img.shape[1]
    c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
    if self.opt.keep_res:
      input_h = (height | self.opt.pad) + 1
      input_w = (width | self.opt.pad) + 1
      s = np.array([input_w, input_h], dtype=np.float32)
    else:
      s = max(img.shape[0], img.shape[1]) * 1.0
      input_h, input_w = self.opt.input_h, self.opt.input_w

    flipped = False
    if self.split == 'train':
      if not self.opt.not_rand_crop:
        s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
        w_border = self._get_border(128, img.shape[1])
        h_border = self._get_border(128, img.shape[0])
        c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
        c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
      else:
        sf = self.opt.scale
        cf = self.opt.shift
        c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
        c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
        s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)

      if np.random.random() < self.opt.flip:
        flipped = True

        # target
        # target_img = target_img[:, ::-1, :]
        seg_img = seg_img[:, ::-1]
        img = img[:, ::-1, :]

        c[0] =  width - c[0] - 1


    trans_input = get_affine_transform(
      c, s, 0, [input_w, input_h])

    seg_inp = cv2.warpAffine(seg_img, trans_input,
                             (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
    # print('pre: ', img.shape)
    # target
    # target_inp = cv2.warpAffine(target_img, trans_input,(input_w, input_h),flags=cv2.INTER_LINEAR)

    inp = np.zeros((input_w, input_h, N_FRAMES*3))
    if inp.shape[2] == N_FRAMES*3:
      for i in range(N_FRAMES):
        inp[:, :, i*3:i*3+3] = cv2.warpAffine(img[:, :, i*3:i*3+3], trans_input,
                              (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
    # print('post: ', inp.shape)
    # target
    # target_inp = (target_inp.astype(np.float32) / 255.)

    inp = (inp.astype(np.float32) / 255.)

    seg_inp = (seg_inp.astype(np.float32) / 255.) # hughes


    # print('np.mean(inp), PRE: ', np.mean(inp))
    if inp.shape[2] == N_FRAMES*3:
      for i in range(N_FRAMES):
        if self.split == 'train' and not self.opt.no_color_aug:
          color_aug(self._data_rng, inp[:, :, i*3:i*3+3], self._eig_val, self._eig_vec)
    else:
      if self.split == 'train' and not self.opt.no_color_aug:
        color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

    # target
    # color_aug(self._data_rng, target_inp, self._eig_val, self._eig_vec)
    
    # print('np.mean(inp), POST: ', np.mean(inp))
    if inp.shape[2] == N_FRAMES*3:
      for i in range(N_FRAMES):
        inp[:, :, i*3:i*3+3] = (inp[:, :, i*3:i*3+3] - self.mean) / self.std
    else:
      inp = (inp - self.mean) / self.std
    # target
    # target_inp = (target_inp - self.mean) / self.std

    inp = inp.transpose(2, 0, 1)

    # target
    # target_inp = target_inp.transpose(2, 0, 1)

    output_h = input_h // self.opt.down_ratio
    output_w = input_w // self.opt.down_ratio
    num_classes = self.num_classes
    trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

    hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
    wh = np.zeros((self.max_objs, 2), dtype=np.float32)
    dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
    reg = np.zeros((self.max_objs, 2), dtype=np.float32)
    ind = np.zeros((self.max_objs), dtype=np.int64)
    reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
    cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
    cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)

    draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                    draw_umich_gaussian

    gt_det = []
    for k in range(num_objs):
      ann = anns[k]
      bbox = self._coco_box_to_bbox(ann['bbox'])
      cls_id = int(self.cat_ids[ann['category_id']])
      if flipped:
        bbox[[0, 2]] = width - bbox[[2, 0]] - 1
      bbox[:2] = affine_transform(bbox[:2], trans_output)
      bbox[2:] = affine_transform(bbox[2:], trans_output)
      bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
      bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
      h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
      if h > 0 and w > 0:
        radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        radius = max(0, int(radius))
        radius = self.opt.hm_gauss if self.opt.mse_loss else radius
        ct = np.array(
          [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
        ct_int = ct.astype(np.int32)
        draw_gaussian(hm[cls_id], ct_int, radius)
        wh[k] = 1. * w, 1. * h
        ind[k] = ct_int[1] * output_w + ct_int[0]
        reg[k] = ct - ct_int
        reg_mask[k] = 1
        cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
        cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
        if self.opt.dense_wh:
          draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
        gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
                       ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])

    # write_hm = cv2.resize(((hm-np.min(hm)/np.max(hm))*255).astype(np.uint8).squeeze(0), (512, 512))
    # cv2.imwrite('/store/datasets/UA-Detrac/test_sample/VID_HM/'  + 'inp_' + os.path.basename(file_name) + '_' + 'HM.jpg', write_hm)

    # ReScale 1/4
    # scale_percent = 25  # percent of original size
    # width = int(seg_inp.shape[1] * scale_percent / 100)
    # height = int(seg_inp.shape[0] * scale_percent / 100)
    # dim = (width, height)
    # seg_inp = cv2.resize(seg_inp, dim, interpolation=cv2.INTER_AREA)
    seg_inp = np.expand_dims(seg_inp, 0)
    ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'seg': seg_inp}  # 'seg': np.expand_dims(seg_inp, 0)}
    if self.opt.dense_wh:
      hm_a = hm.max(axis=0, keepdims=True)
      dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
      ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
      del ret['wh']
    elif self.opt.cat_spec_wh:
      ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
      del ret['wh']
    if self.opt.reg_offset:
      ret.update({'reg': reg})
    if self.opt.debug > 0 or not self.split == 'train':
      gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
               np.zeros((1, 6), dtype=np.float32)
      meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
      ret['meta'] = meta

    # if inp.shape[0] == N_FRAMES*3:
    #   for i in range(N_FRAMES):
    #     img_test = (inp[i*3:i*3+3, :, :].transpose(1, 2, 0) * 255).astype(np.uint8)
    #     cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/test_sample/VID_HM/", 'inp_' + os.path.basename(file_name) + '_' + str(i)), img_test)

    #img_test = (target_inp.transpose(1, 2, 0) * 255).astype(np.uint8)
    # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/exp/tensors/VID_HM/", os.path.basename(file_name) + '_target'), img_test)

    # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)).replace('.jpg', '_seg.jpg'), (seg_inp.transpose(1, 2, 0) * 255).astype(np.uint8))

    # exit()
    return ret
Example #13
0
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(
            self.img_dir,
            self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)
        labels = np.array(
            [self.cat_ids[anno['category_id']] for anno in annotations])
        bboxes = np.array([anno['bbox'] for anno in annotations],
                          dtype=np.float32)
        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])
        bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy
        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.],
                          dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0
        '''
    imgs = coco_a.getImgIds()  [391895, 522418, 184613, 318219, ...]图片id列表

    bboxes 图片标记的所有框的列表
    [[359.17 146.17 112.45 213.57]
    [339.88  22.16 153.88 300.73]
    [471.64 172.82  35.92  48.1 ]
    [486.01 183.31  30.63  34.98]]
    '''
        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(128, width)
            h_border = get_border(128, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        trans_img = get_affine_transform(
            center, scale, 0, [self.img_size['w'], self.img_size['h']])
        img = cv2.warpAffine(img, trans_img,
                             (self.img_size['w'], self.img_size['h']))

        # -----------------------------------debug---------------------------------
        # for bbox, label in zip(bboxes, labels):
        #   if flipped:
        #     bbox[[0, 2]] = width - bbox[[2, 0]] - 1
        #   bbox[:2] = affine_transform(bbox[:2], trans_img)
        #   bbox[2:] = affine_transform(bbox[2:], trans_img)
        #   bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1)
        #   bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1)
        #   cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
        #   cv2.putText(img, self.class_name[label + 1], (int(bbox[0]), int(bbox[1])),
        #               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        # cv2.imshow('img', img)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------

        img = img.astype(np.float32) / 255.

        if self.split == 'train':
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        trans_fmap = get_affine_transform(
            center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])

        hmap = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # heatmap
        w_h_ = np.zeros((self.max_objs, 2),
                        dtype=np.float32)  # width and height
        regs = np.zeros((self.max_objs, 2), dtype=np.float32)  # regression
        inds = np.zeros((self.max_objs, ), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)

        # detections = []
        '''
bbox = bboxes[0]
print(bbox[[0, 2]])
print(bbox)
print(bbox[0:2])

[359.17 112.45]
[359.17 146.17 112.45 213.57]
[359.17 146.17]
<class 'numpy.ndarray'>
'''

        for k, (bbox, label) in enumerate(zip(bboxes, labels)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                obj_c = np.array([(bbox[0] + bbox[2]) / 2,
                                  (bbox[1] + bbox[3]) / 2],
                                 dtype=np.float32)
                obj_c_int = obj_c.astype(np.int32)

                radius = max(
                    0,
                    int(
                        gaussian_radius((math.ceil(h), math.ceil(w)),
                                        self.gaussian_iou)))
                draw_umich_gaussian(hmap[label], obj_c_int, radius)
                w_h_[k] = 1. * w, 1. * h
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1
                # groundtruth bounding box coordinate with class
                # detections.append([obj_c[0] - w / 2, obj_c[1] - h / 2,
                #                    obj_c[0] + w / 2, obj_c[1] + h / 2, 1, label])

        # detections = np.array(detections, dtype=np.float32) \
        #   if len(detections) > 0 else np.zeros((1, 6), dtype=np.float32)

        return {
            'image': img,
            'hmap': hmap,
            'w_h_': w_h_,
            'regs': regs,
            'inds': inds,
            'ind_masks': ind_masks,
            'c': center,
            's': scale,
            'img_id': img_id
        }
Example #14
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)
        #根据图像文件名,读入对应的关键点标注文件
        (filepath, tempfilename) = os.path.split(img_path)
        (filename, extension) = os.path.splitext(tempfilename)
        kps_path = os.path.join(
            '/media/srt/dataset/L_Shelf_0114/Kps_Ann', filename + '_kps.npy'
        )  #/media/srt/resource/Halcon_Project/L_shelf_dataset/HG_Dataset/kps
        kps_raw = np.load(kps_path)
        c3 = np.ones(6)
        kps_ann = np.column_stack((kps_raw, c3))  #将关键点维度变为[6,3]

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0  #对crop,shift进行赋值
        input_h, input_w = self.opt.input_h, self.opt.input_w  #在opt中定义的分辨率
        rot = 0

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale  #0
                cf = self.opt.shift  #0
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            #加上multi-pose中的随机旋转
            if np.random.random() < self.opt.aug_rot:
                rf = self.opt.rotate
                rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)

            # if np.random.random() < self.opt.flip:
            #     flipped = True
            #     img = img[:, ::-1, :]
            #     c[0] = width - c[0] - 1

        # 对输入执行仿射变换
        trans_input = get_affine_transform(c, s, rot, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)
        test_image = inp[1]  #用于与kps_hp可视化使用

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        num_kps = 6  #点数是否需要+1 ?

        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])
        trans_output_rot = get_affine_transform(c, s, rot,
                                                [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w),
                      dtype=np.float32)  #中心对应的hp
        hm_hp = np.zeros((num_kps, output_h, output_w),
                         dtype=np.float32)  #kps对应的hp
        #此处只是初始化,未赋值
        dense_kps = np.zeros((num_kps, 2, output_h, output_w),
                             dtype=np.float32)
        dense_kps_mask = np.zeros((num_kps, output_h, output_w),
                                  dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        kps = np.zeros((num_kps, num_kps * 2),
                       dtype=np.float32)  #其他关键点指向某个关键点的向量
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        kps_mask = np.zeros((self.max_objs, self.num_kps * 2), dtype=np.uint8)
        hp_offset = np.zeros((self.max_objs * num_kps, 2), dtype=np.float32)
        hp_ind = np.zeros((self.max_objs * num_kps), dtype=np.int64)
        hp_mask = np.zeros((self.max_objs * num_kps), dtype=np.int64)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian
        #获取标注各项数据的标志
        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            #pts的读入方式可以自行定义
            pts = np.array(kps_ann,
                           np.float32).reshape(num_kps,
                                               3)  #原来的按照coco数据集json标注读入

            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                num_kpts = pts[:, 2].sum()
                if num_kpts == 0:
                    hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
                    reg_mask[k] = 0
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1

                hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                hp_radius = self.opt.hm_gauss \
                    if self.opt.mse_loss else max(0, int(hp_radius))
                for j in range(num_kps):
                    if pts[j, 2] > 0:
                        #如果关键点的第3位>0,则对关键点进行变换
                        pts[j, :2] = affine_transform(pts[j, :2],
                                                      trans_output)  #对关键点进行变换
                        if pts[j, 0] >= 0 and pts[j, 0] < output_w and \
                                pts[j, 1] >= 0 and pts[j, 1] < output_h:
                            #计算其他点指向该点的向量
                            kps[j, j * 2:j * 2 + 2] = pts[:, :2] - pts[j, :2]
                            kps_mask[k, j * 2:j * 2 + 2] = 1
                            pt_int = pts[j, :2].astype(np.int32)
                            hp_offset[k * num_kps + j] = pts[j, :2] - pt_int
                            hp_mask[k * num_kps + j] = 1
                            if self.opt.dense_hp:
                                #必须在中心点hm gassian之前画
                                print('draw dense hp!!!')
                                draw_dense_reg(dense_kps[j],
                                               hm[cls_id],
                                               ct_int,
                                               pts[j, :2] - ct_int,
                                               radius,
                                               is_offset=True)
                                draw_gaussian(dense_kps_mask[j], ct_int,
                                              radius)
                            draw_gaussian(hm_hp[j], pt_int, hp_radius)
                            heatmap = np.squeeze(hm_hp[j])  #(1,160,240)
                            heatmap = cv2.resize(heatmap, (960, 640),
                                                 interpolation=cv2.INTER_CUBIC)
                            new_image = test_image + heatmap * 2
                            array_name = 'forbidden_s_c_kps_hp/visual_kps_' + str(
                                index) + '_' + str(j) + '.png'
                            # matplotlib.image.imsave(array_name, new_image)
                #画中心点的高斯图
                draw_gaussian(hm[cls_id], ct_int, radius)
                heatmap = np.squeeze(hm[cls_id])  # (1,160,240)
                heatmap = cv2.resize(heatmap, (960, 640),
                                     interpolation=cv2.INTER_CUBIC)
                new_image = test_image + heatmap * 2
                array_name = 'visual_center_' + str(index) + '.png'
                # matplotlib.image.imsave(array_name, new_image)

                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] +
                    h / 2, 1
                ] + pts[:, :2].reshape(num_kps * 2).tolist() + [cls_id])
                # gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
                #                ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
        #在原来的基础上增加了 'hps','hps_mask'
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'hps': kps,
            'hps_mask': kps_mask
        }
        if self.opt.dense_hp:
            dense_kps = dense_kps.reshape(num_kps * 2, output_h, output_w)
            dense_kps_mask = dense_kps_mask.reshape(num_kps, 1, output_h,
                                                    output_w)
            dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask],
                                            axis=1)
            dense_kps_mask = dense_kps_mask.reshape(num_kps * 2, output_h,
                                                    output_w)
            ret.update({
                'dense_hps': dense_kps,
                'dense_hps_mask': dense_kps_mask
            })
            del ret['hps'], ret['hps_mask']
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.hm_hp:
            ret.update({'hm_hp': hm_hp})
        if self.opt.reg_hp_offset:
            ret.update({
                'hp_offset': hp_offset,
                'hp_ind': hp_ind,
                'hp_mask': hp_mask
            })
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Example #15
0
    def _get_data(self, position):
        img_id = self.images[self._indexes[position]]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.params.max_objs)

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                assert (
                    len(img.shape) == 3
                ), f"The dimensions of img should be 3. Filename: {img_path}, shape: {img.shape}"
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1
        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._rng, inp, self.params._eig_val,
                      self.params._eig_vec)
        inp = (inp - self.params.mean) / self.params.std

        if self.mixed_precision:
            inp = fast_pad(inp)
            # Transpose to NCHW if channel_last is not enabled
        if not self.channel_last:
            inp = inp.transpose(2, 0, 1)
        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.params.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        ind = np.zeros((self.params.max_objs), dtype=np.int32)
        wh = np.zeros((self.params.max_objs, 2), dtype=np.float32)
        reg = np.zeros((self.params.max_objs, 2), dtype=np.float32)
        reg_mask = np.zeros((self.params.max_objs, 1), dtype=np.float32)
        cls = np.zeros((self.params.max_objs, 1), dtype=np.int32)

        draw_gaussian = draw_umich_gaussian
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.params.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cls[k] = cls_id
        # Transpose heatmap to NHWC if channel last is enabled
        if self.channel_last:
            hm = np.transpose(hm, (1, 2, 0))
        ret = (inp, hm, ind, wh, reg, reg_mask, cls)
        return ret
Example #16
0
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(
            self.img_dir,
            self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)
        img = self.coco.loadImgs(ids=[img_id])[0]
        w_img = int(img['width'])
        h_img = int(img['height'])

        labels = []
        bboxes = []
        shapes = []

        for anno in annotations:
            if anno['iscrowd'] == 1:  # Excludes crowd objects
                continue

            # polygons = anno['segmentation'][0]
            polygons = anno['segmentation']
            if len(polygons) > 1:
                bg = np.zeros((h_img, w_img, 1), dtype=np.uint8)
                for poly in polygons:
                    len_poly = len(poly)
                    vertices = np.zeros((1, len_poly // 2, 2), dtype=np.int32)
                    for i in range(len_poly // 2):
                        vertices[0, i, 0] = int(poly[2 * i])
                        vertices[0, i, 1] = int(poly[2 * i + 1])
                    # cv2.fillPoly(bg, vertices, color=(255))
                    cv2.drawContours(bg,
                                     vertices,
                                     color=(255),
                                     contourIdx=-1,
                                     thickness=-1)

                pads = 5
                while True:
                    kernel = np.ones((pads, pads), np.uint8)
                    bg_closed = cv2.morphologyEx(bg, cv2.MORPH_CLOSE, kernel)
                    obj_contours, _ = cv2.findContours(bg_closed,
                                                       cv2.RETR_TREE,
                                                       cv2.CHAIN_APPROX_SIMPLE)
                    if len(obj_contours) > 1:
                        pads += 5
                    else:
                        polygons = obj_contours[0]
                        break
            else:
                # continue
                polygons = anno['segmentation'][0]

            gt_x1, gt_y1, gt_w, gt_h = anno['bbox']
            contour = np.array(polygons).reshape((-1, 2))

            # Downsample the contour to fix number of vertices
            fixed_contour = resample(contour, num=self.n_vertices)

            fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1,
                                          gt_x1 + gt_w)
            fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1,
                                          gt_y1 + gt_h)
            # contour_mean = np.mean(fixed_contour, axis=0)
            contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0)**2))
            if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan:  # invalid shapes
                continue

            shapes.append(np.ndarray.flatten(fixed_contour).tolist())
            labels.append(self.cat_ids[anno['category_id']])
            bboxes.append(anno['bbox'])

        labels = np.array(labels)
        bboxes = np.array(bboxes, dtype=np.float32)
        shapes = np.array(shapes, dtype=np.float32)

        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])
            shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32)
        bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy

        # if img_id in self.all_annotations.keys():
        #     annotations = self.all_annotations[img_id]
        #     shape_annots = self.all_shapes[img_id]
        #     labels = annotations['cat_id']
        #     bboxes = annotations['bbox']  # xyxy format
        #     shapes = shape_annots['shape']  # polygonal vertices format xyxyxyxyxy...
        #     codes = annotations['codes']
        #     labels = np.array(labels)
        #     bboxes = np.array(bboxes, dtype=np.float32)
        #     codes = np.array(codes, dtype=np.float32)
        #     shapes = np.array(shapes, dtype=np.float32)
        # else:
        #     bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
        #     labels = np.array([[0]])
        #     codes = np.zeros(shape=(1, self.n_codes), dtype=np.float32)
        #     shapes = np.zeros(shape=(1, self.n_vertices * 2), dtype=np.float32)

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.],
                          dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(128, width)
            h_border = get_border(128, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        trans_img = get_affine_transform(
            center, scale, 0, [self.img_size['w'], self.img_size['h']])
        img = cv2.warpAffine(img, trans_img,
                             (self.img_size['w'], self.img_size['h']))

        # -----------------------------------debug---------------------------------
        # image_show = img.copy()
        # for bbox, label, shape in zip(bboxes, labels, shapes):
        #     if flipped:
        #         bbox[[0, 2]] = width - bbox[[2, 0]] - 1
        #         # Flip the contour
        #         for m in range(self.n_vertices):
        #             shape[2 * m] = width - shape[2 * m] - 1
        #     bbox[:2] = affine_transform(bbox[:2], trans_img)
        #     bbox[2:] = affine_transform(bbox[2:], trans_img)
        #     bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.img_size['w'] - 1)
        #     bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.img_size['h'] - 1)
        #
        #     # generate gt shape mean and std from contours
        #     for m in range(self.n_vertices):  # apply scale and crop transform to shapes
        #         shape[2 * m:2 * m + 2] = affine_transform(shape[2 * m:2 * m + 2], trans_img)
        #
        #     contour = np.reshape(shape, (self.n_vertices, 2))
        #     # Indexing from the left-most vertex, argmin x-axis
        #     idx = np.argmin(contour[:, 0])
        #     indexed_shape = np.concatenate((contour[idx:, :], contour[:idx, :]), axis=0)
        #
        #     clockwise_flag = check_clockwise_polygon(indexed_shape)
        #     if not clockwise_flag:
        #         fixed_contour = np.flip(indexed_shape, axis=0)
        #     else:
        #         fixed_contour = indexed_shape
        #
        #     contour[:, 0] = np.clip(fixed_contour[:, 0], 0, self.img_size['w'] - 1)
        #     contour[:, 1] = np.clip(fixed_contour[:, 1], 0, self.img_size['h'] - 1)
        #
        #     # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
        #     # cv2.polylines(image_show, [contour.astype(np.int32)], True, (0, 0, 255), thickness=2)
        #     cv2.drawContours(image_show, [contour.astype(np.int32)],
        #                      color=(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)),
        #                      contourIdx=-1, thickness=-1)
        #
        # cv2.imshow('img', image_show)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------

        img = img.astype(np.float32) / 255.

        if self.split == 'train':
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        trans_fmap = get_affine_transform(
            center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])

        hmap = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # heatmap
        # w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32)  # width and height of the shape
        w_h_std = np.zeros((self.max_objs, 2),
                           dtype=np.float32)  # width and height of the shape
        codes_ = np.zeros((self.max_objs, self.n_codes),
                          dtype=np.float32)  # gt coefficients/codes for shapes
        regs = np.zeros(
            (self.max_objs, 2),
            dtype=np.float32)  # regression for offsets of shape center
        inds = np.zeros((self.max_objs, ), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)

        # detections = []
        for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                # Flip the contour
                for m in range(self.n_vertices):
                    shape[2 * m] = width - shape[2 * m] - 1

            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            # generate gt shape mean and std from contours
            for m in range(self.n_vertices
                           ):  # apply scale and crop transform to shapes
                shape[2 * m:2 * m + 2] = affine_transform(
                    shape[2 * m:2 * m + 2], trans_fmap)

            contour = np.reshape(shape, (self.n_vertices, 2))
            # Indexing from the left-most vertex, argmin x-axis
            idx = np.argmin(contour[:, 0])
            indexed_shape = np.concatenate(
                (contour[idx:, :], contour[:idx, :]), axis=0)

            clockwise_flag = check_clockwise_polygon(indexed_shape)
            if not clockwise_flag:
                fixed_contour = np.flip(indexed_shape, axis=0)
            else:
                fixed_contour = indexed_shape.copy()

            contour[:, 0] = np.clip(fixed_contour[:, 0], 0,
                                    self.fmap_size['w'] - 1)
            contour[:, 1] = np.clip(fixed_contour[:, 1], 0,
                                    self.fmap_size['h'] - 1)

            contour_mean = np.mean(contour, axis=0)
            contour_std = np.std(contour, axis=0)
            if np.sqrt(np.sum(contour_std**2)) <= 1e-6:
                continue
            else:
                norm_shape = (contour - contour_mean) / np.sqrt(
                    np.sum(contour_std**2))

            if h > 0 and w > 0 and np.sqrt(np.sum(contour_std**2)) > 1e-6:
                obj_c = contour_mean
                obj_c_int = obj_c.astype(np.int32)

                radius = max(
                    0,
                    int(
                        gaussian_radius((math.ceil(h), math.ceil(w)),
                                        self.gaussian_iou)))
                draw_umich_gaussian(hmap[label], obj_c_int, radius)
                w_h_std[k] = contour_std
                temp_codes, _ = fast_ista(norm_shape.reshape((1, -1)),
                                          self.dictionary,
                                          lmbda=self.sparse_alpha,
                                          max_iter=80)
                codes_[k] = np.exp(temp_codes)
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1
                # groundtruth bounding box coordinate with class
                # detections.append([obj_c[0] - w / 2, obj_c[1] - h / 2,
                #                    obj_c[0] + w / 2, obj_c[1] + h / 2, 1, label])

        # detections = np.array(detections, dtype=np.float32) \
        #   if len(detections) > 0 else np.zeros((1, 6), dtype=np.float32)

        # -----------------------------------debug---------------------------------
        # canvas = np.zeros((self.fmap_size['h'] * 2, self.fmap_size['w'] * 2, 3), dtype=np.float32)
        # canvas[0:self.fmap_size['h'], 0:self.fmap_size['w'], :] = np.tile(np.expand_dims(hmap[0], 2), (1, 1, 3))
        # canvas[0:self.fmap_size['h'], self.fmap_size['w']:, :] = np.tile(np.expand_dims(hmap[1], 2), (1, 1, 3))
        # canvas[self.fmap_size['h']:, 0:self.fmap_size['w'], :] = np.tile(np.expand_dims(hmap[2], 2), (1, 1, 3))
        # canvas[self.fmap_size['h']:, self.fmap_size['w']:, :] = np.tile(np.expand_dims(hmap[3], 2), (1, 1, 3))
        # print(w_h_[0], regs[0])
        # cv2.imshow('hmap', canvas)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------
        # -----------------------------------debug---------------------------------
        # image_show = img.copy()
        # for bbox, label, shape in zip(bboxes, labels, shapes):
        #     cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
        #     cv2.polylines(image_show, [contour.astype(np.int32)], True, (0, 0, 255), thickness=2)
        # cv2.imshow('img', image_show)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------

        return {
            'image': img,
            'codes': codes_,
            'hmap': hmap,
            'w_h_std': w_h_std,
            'regs': regs,
            'inds': inds,
            'ind_masks': ind_masks,
            'c': center,
            's': scale,
            'img_id': img_id
        }
Example #17
0
    def __next__(self):
        load_vid_time, img_transform_time, create_heatmap_time = 0, 0, 0
        start = time.time()
        if self.cap is None or self.count >= self.length:
            if self.cap is not None and self.vid_i == self.num_videos and self.loop:
                self.vid_i = 0
            elif self.cap is not None and self.vid_i == self.num_videos:
                raise StopIteration
            if self.opt.vidstream == 'skvideo':
                self.cap = skvideo.io.vread(self.video_paths[self.vid_i])
                metadata = skvideo.io.ffprobe(self.video_paths[self.vid_i])
                fr_lst = metadata['video']['@avg_frame_rate'].split('/')
                self.rate = int(fr_lst[0]) / int(fr_lst[1])
                self.length = int(metadata['video']['@nb_frames'])
            else:
                self.cap = cv2.VideoCapture(self.video_paths[self.vid_i])
                width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                self.rate = self.cap.get(cv2.CAP_PROP_FPS)
                self.length = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
                self.frame_gen = self._frame_from_video(self.cap)

            # self.detections = pickle.load(open(self.annotation_path[self.vid_i], 'rb'))
            self.count = 0
            self.vid_i += 1
        end_load_vid = time.time()
        load_vid_time = end_load_vid - start

        # load image depending on stream
        start_resize = time.time()
        if self.opt.vidstream == 'skvideo':
            img = self.cap[self.count]
        else:
            img = next(self.frame_gen)
            # in_h = int(original_img.shape[0] / self.opt.downsample)
            # in_w = int(original_img.shape[1] / self.opt.downsample)
            # img = cv2.resize(original_img, (in_w, in_h))
            # cv2.imwrite("/home/jl5/CenterNet/tmp.png", img)

        start_img_transform = time.time()
        anns = self.mmdetect_pred2inst(self.count)
        num_objs = min(len(anns), self.max_objs)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        # send to gpu
        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = torch.from_numpy(inp).cuda()
        inp = (inp.float() / 255.)

        # if self.split == 'train' and not self.opt.no_color_aug:
        #   color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

        inp = (inp - torch.from_numpy(self.mean).cuda()) / torch.from_numpy(
            self.std).cuda()
        inp = inp.permute(2, 0, 1)

        end_img_transform = time.time()
        img_transform_time = end_img_transform - start_img_transform

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)
        unconfident_hm = np.zeros((num_classes, output_h, output_w),
                                  dtype=np.float32)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []

        def show_bbox(im):
            fig, ax = plt.subplots(1)
            ax.imshow(im)
            for i in range(num_objs):
                bbox = np.array(anns[i]['bbox'], dtype=np.int32)
                bbox = bbox / self.opt.downsample
                rect = patches.Rectangle((bbox[0], bbox[1]),
                                         bbox[2] - bbox[0],
                                         bbox[3] - bbox[1],
                                         linewidth=1,
                                         edgecolor='r',
                                         facecolor='none')
                ax.add_patch(rect)
            plt.savefig('/home/jl5/CenterNet/tmp.png')
            pdb.set_trace()

        # detect = self.detections[self.count]
        # if self.opt.task == 'ctdet_semseg':
        #   seg_mask, weight_mask = batch_segmentation_masks(1, (720, 1280), np.array([detect['boxes']]), np.array([detect['classes']]), detect['masks'],
        #       np.array([detect['scores']]), [len(detect['boxes'])], True, coco_class_groups, mask_threshold=0.5, box_threshold=self.opt.center_thresh, scale_boxes=False)
        #   unbatch_seg = seg_mask[0].astype(np.uint8)
        #   unbatch_weight = weight_mask[0].astype(np.uint8)
        #   seg_mask = np.expand_dims(cv2.resize(unbatch_seg, (1280, 736)), axis=0).astype(np.int32)
        #   weight_mask = np.expand_dims(cv2.resize(unbatch_weight, (1280, 736)), axis = 0).astype(bool)

        start_detect = time.time()

        for k in range(num_objs):
            ann = anns[k]
            bbox = np.array(
                ann['bbox'],
                dtype=np.float32)  # self._coco_box_to_bbox(ann['bbox'])
            # bbox = bbox / self.opt.downsample # if need to downsample
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                if ann['score'] >= 0.3 and ann['score'] < 0.5:
                    draw_gaussian(unconfident_hm[cls_id], ct_int, radius)
                    reg_mask[k] = 0
                else:
                    draw_gaussian(hm[cls_id], ct_int, radius)
                    reg_mask[k] = 1
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int

                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])
        if self.opt.task == 'ctdet_semseg':
            ret = {
                'input': inp,
                'hm': hm,
                'reg_mask': reg_mask,
                'ind': ind,
                'wh': wh,
                'seg': seg_mask,
                'weight_seg': weight_mask
            }
        else:
            ret = {
                'input': inp,
                'hm': hm,
                'reg_mask': reg_mask,
                'ind': ind,
                'wh': wh,
                'unconf_hm': unconfident_hm
            }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                    np.zeros((1, 6), dtype=np.float32)
            # meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': self.count}
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': self.count}
            ret['meta'] = meta
        self.count += 1

        end_detect_time = time.time()
        create_heatmap_time = end_detect_time - start_detect
        # print("load vid {:.4f} | img transform {:.4f} | create instance {:.4f} \n".format(load_vid_time, img_transform_time, create_heatmap_time))
        return ret
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)
        img = self.coco.loadImgs(ids=[img_id])[0]
        w_img = int(img['width'])
        h_img = int(img['height'])

        labels = []
        bboxes = []
        shapes = []

        for anno in annotations:
            # add some fields for evaluation
            # anno['iscrowd'] = 0
            # anno['segmentation'] = anno['a_segm']  # only evaluate amodal segmentation
            # anno['bbox'] = anno['i_bbox']  # only evaluate inmodal detection

            if anno['category_id'] not in KINS_IDS:
                continue  # excludes 3: person-sitting class for evaluation

            polygons = get_connected_polygon_using_mask(anno['segmentation'], (h_img, w_img),
                                                        n_vertices=self.n_vertices, closing_max_kernel=50)

            gt_x1, gt_y1, gt_w, gt_h = anno['a_bbox']  # this is used to clip resampled polygons
            contour = np.array(polygons).reshape((-1, 2))

            # Downsample the contour to fix number of vertices
            if len(contour) > self.n_vertices:
                fixed_contour = resample(contour, num=self.n_vertices)
            else:
                fixed_contour = turning_angle_resample(contour, self.n_vertices)

            fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w)
            fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h)

            contour_std = np.sqrt(np.sum(np.std(fixed_contour, axis=0) ** 2))
            if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan:  # invalid shapes
                continue

            shapes.append(np.ndarray.flatten(fixed_contour).tolist())
            labels.append(self.cat_ids[anno['category_id']])
            bboxes.append(anno['bbox'])

        labels = np.array(labels)
        bboxes = np.array(bboxes, dtype=np.float32)
        shapes = np.array(shapes, dtype=np.float32)

        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])
            shapes = np.zeros((1, self.n_vertices * 2), dtype=np.float32)
        bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.], dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(400, width)
            h_border = get_border(180, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        trans_img = get_affine_transform(center, scale, 0, [self.img_size['w'], self.img_size['h']])
        # -----------------------------------debug---------------------------------
        # image_show = img.copy()

        img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h']))

        img = img.astype(np.float32) / 255.

        if self.split == 'train':
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        trans_fmap = get_affine_transform(center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])
        # -----------------------------------debug---------------------------------
        # image_show = cv2.warpAffine(image_show, trans_fmap, (self.fmap_size['w'], self.fmap_size['h']))

        hmap = np.zeros((self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
                        dtype=np.float32)  # heatmap of centers
        occ_map = np.zeros((1, self.fmap_size['h'], self.fmap_size['w']),
                           dtype=np.float32)  # grayscale map for occlusion levels
        w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32)  # width and height of inmodal bboxes
        shapes_ = np.zeros((self.max_objs, self.n_vertices * 2), dtype=np.float32)  # gt amodal segmentation polygons
        center_offsets = np.zeros((self.max_objs, 2), dtype=np.float32)  # gt amodal mass centers to inmodal bbox center
        codes_ = np.zeros((self.max_objs, self.n_codes), dtype=np.float32)  # gt amodal coefficients
        regs = np.zeros((self.max_objs, 2), dtype=np.float32)  # regression for quantization error
        inds = np.zeros((self.max_objs,), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs,), dtype=np.uint8)

        for k, (bbox, label, shape) in enumerate(zip(bboxes, labels, shapes)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                # Flip the contour x-axis
                for m in range(self.n_vertices):
                    shape[2 * m] = width - shape[2 * m] - 1

            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            # generate gt shape mean and std from contours
            for m in range(self.n_vertices):  # apply scale and crop transform to shapes
                shape[2 * m:2 * m + 2] = affine_transform(shape[2 * m:2 * m + 2], trans_fmap)

            shape_clipped = np.reshape(shape, (self.n_vertices, 2))

            shape_clipped[:, 0] = np.clip(shape_clipped[:, 0], 0, self.fmap_size['w'] - 1)
            shape_clipped[:, 1] = np.clip(shape_clipped[:, 1], 0, self.fmap_size['h'] - 1)

            clockwise_flag = check_clockwise_polygon(shape_clipped)
            if not clockwise_flag:
                fixed_contour = np.flip(shape_clipped, axis=0)
            else:
                fixed_contour = shape_clipped.copy()
            # Indexing from the left-most vertex, argmin x-axis
            idx = np.argmin(fixed_contour[:, 0])
            indexed_shape = np.concatenate((fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0)

            mass_center = np.mean(indexed_shape, axis=0)
            if h < 1e-6 or w < 1e-6:  # remove small bboxes
                continue

            centered_shape = indexed_shape - mass_center

            if h > 0 and w > 0:
                obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
                # obj_c = mass_center
                obj_c_int = obj_c.astype(np.int32)

                radius = max(0, int(gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou)))
                draw_umich_gaussian(hmap[label], obj_c_int, radius)
                shapes_[k] = centered_shape.reshape((1, -1))
                # shapes_[k] = indexed_shape.reshape((1, -1))  # only for debugging
                center_offsets[k] = mass_center - obj_c
                codes_[k], _ = fast_ista(centered_shape.reshape((1, -1)), self.dictionary,
                                         lmbda=self.sparse_alpha, max_iter=60)
                w_h_[k] = 1. * w, 1. * h
                # w_h_[k] = mass_center[1] - bbox[1], bbox[3] - mass_center[1], \
                #           mass_center[0] - bbox[0], bbox[2] - mass_center[0]  # [top, bottom, left, right] distance
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1

                # occlusion level map gt
                occ_map[0] += self.polys_to_mask([np.ndarray.flatten(indexed_shape).tolist()], self.fmap_size['h'],
                                                 self.fmap_size['w']) * 1.

        occ_map = np.clip(occ_map, 0, self.max_occ) / self.max_occ

        # -----------------------------------debug---------------------------------
        # for bbox, label, shape in zip(bboxes, labels, shapes_):
        #     # cv2.rectangle(image_show, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1)
        #     cv2.putText(image_show, str(self.reverse_labels[label]), (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        #     # print(shape, shape.shape)
        #     cv2.polylines(image_show, [shape.reshape(self.n_vertices, 2).astype(np.int32)], True, (0, 0, 255),
        #                   thickness=1)
        # # cv2.imshow('img', image_show)
        # # cv2.imshow('occ', occ_map.astype(np.uint8).reshape(occ_map.shape[1], occ_map.shape[2]) * 255)
        # m_img = cv2.cvtColor((occ_map * 255).astype(np.uint8).reshape(occ_map.shape[1], occ_map.shape[2]),
        #                      code=cv2.COLOR_GRAY2BGR)
        # cat_img = np.concatenate([m_img, image_show], axis=0)
        # cv2.imshow('segm', cat_img)
        # cv2.waitKey()
        # -----------------------------------debug---------------------------------

        return {'image': img, 'shapes': shapes_, 'codes': codes_, 'offsets': center_offsets, 'occ_map': occ_map,
                'hmap': hmap, 'w_h_': w_h_, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks,
                'c': center, 's': scale, 'img_id': img_id}
Example #19
0
    def __getitem__(self, index):
        img_id = self.img_paths[index]
        img_set, img_vid, img_name = img_id.split("_", 2)
        img_name = img_name.replace("txt", "jpg")
        img_path = os.path.join(self.img_dir, img_set, img_vid)
        img_rgb = cv2.imread(os.path.join(img_path, "visible", img_name),
                             cv2.IMREAD_COLOR)
        img_ir = cv2.imread(os.path.join(img_path, "lwir", img_name),
                            cv2.IMREAD_GRAYSCALE)

        with open(os.path.join(self.annot_path,
                               self.img_paths[index])) as annot_file:
            annot_data = [line.rstrip('\n') for line in annot_file][1:]

        bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
        if len(annot_data) != 0:
            bboxes = bboxes.repeat(len(annot_data), axis=0)
            for i in range(len(annot_data)):
                line_data = annot_data[i].split()
                label = line_data[0]
                if self.split == "train":
                    if label not in ["person", "person?", "people"]:
                        continue
                elif label != "person":
                    continue
                bboxes[i, :] = list(map(int, line_data[1:5]))

        bboxes[:, 2:] += bboxes[:, :2]

        # resize image and bbox
        height, width = img_rgb.shape[:2]
        img_rgb = cv2.resize(img_rgb, (self.img_size['w'], self.img_size['h']))
        img_ir = cv2.resize(img_ir, (self.img_size['w'], self.img_size['h']))
        img_ir = np.expand_dims(img_ir, axis=2)
        bboxes[:, 0::2] *= self.img_size['w'] / width
        bboxes[:, 1::2] *= self.img_size['h'] / height

        # discard non-valid bboxes
        bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, self.img_size['w'] - 1)
        bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, self.img_size['h'] - 1)
        keep_inds = np.logical_and((bboxes[:, 2] - bboxes[:, 0]) > 0,
                                   (bboxes[:, 3] - bboxes[:, 1]) > 0)
        bboxes = bboxes[keep_inds]

        # randomly flip image and bboxes
        if self.split == 'train' and np.random.uniform() > 0.5:
            img_rgb[:] = img_rgb[:, ::-1, :]
            img_ir[:] = img_ir[:, ::-1, :]
            bboxes[:, [0, 2]] = img_rgb.shape[1] - bboxes[:, [2, 0]] - 1

        img_rgb = img_rgb.astype(np.float32) / 255.
        img_ir = img_ir.astype(np.float32) / 255.

        img_rgb -= self.mean[0, 0, :3]
        img_rgb /= self.std[0, 0, :3]
        img_ir -= self.mean[0, 0, 3]
        img_ir /= self.std[0, 0, 3]
        img_rgb = img_rgb.transpose((2, 0, 1))  # [H, W, C] to [C, H, W]
        img_ir = img_ir.transpose((2, 0, 1))

        hmap_tl = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)
        hmap_br = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)

        regs_tl = np.zeros((self.max_objs, 2), dtype=np.float32)
        regs_br = np.zeros((self.max_objs, 2), dtype=np.float32)

        inds_tl = np.zeros((self.max_objs, ), dtype=np.int64)
        inds_br = np.zeros((self.max_objs, ), dtype=np.int64)

        num_objs = np.array(min(bboxes.shape[0], self.max_objs))
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)
        ind_masks[:num_objs] = 1

        for i, (xtl, ytl, xbr, ybr) in enumerate(bboxes):
            fxtl = (xtl * self.fmap_size['w'] / self.img_size['w'])
            fytl = (ytl * self.fmap_size['h'] / self.img_size['h'])
            fxbr = (xbr * self.fmap_size['w'] / self.img_size['w'])
            fybr = (ybr * self.fmap_size['h'] / self.img_size['h'])

            ixtl = int(fxtl)
            iytl = int(fytl)
            ixbr = int(fxbr)
            iybr = int(fybr)

            if self.gaussian:
                width = xbr - xtl
                height = ybr - ytl

                width = math.ceil(width * self.fmap_size['w'] /
                                  self.img_size['w'])
                height = math.ceil(height * self.fmap_size['h'] /
                                   self.img_size['h'])

                radius = max(
                    0, int(gaussian_radius((height, width),
                                           self.gaussian_iou)))

                draw_gaussian(hmap_tl[0], [ixtl, iytl], radius)
                draw_gaussian(hmap_br[0], [ixbr, iybr], radius)
            else:
                hmap_tl[0, iytl, ixtl] = 1
                hmap_br[0, iybr, ixbr] = 1

            regs_tl[i, :] = [fxtl - ixtl, fytl - iytl]
            regs_br[i, :] = [fxbr - ixbr, fybr - iybr]
            inds_tl[i] = iytl * self.fmap_size['w'] + ixtl
            inds_br[i] = iybr * self.fmap_size['w'] + ixbr

        return {
            'img_rgb': img_rgb,
            'img_ir': img_ir,
            'hmap_tl': hmap_tl,
            'hmap_br': hmap_br,
            'regs_tl': regs_tl,
            'regs_br': regs_br,
            'inds_tl': inds_tl,
            'inds_br': inds_br,
            'ind_masks': ind_masks
        }
Example #20
0
    def __getitem__(self, index):
        img_id = self.images[index]

        inp, ann_list, output_w, output_h, meta = self.get_img_ann(index,
                                                                   scale_lv=2)

        # TBD: Mosaic augmentation requires large input image size
        # Increase input image size from 512x512 to 800x800 or larger and
        # adjust the scale level to avoid the mosaic boundary to become
        # a significant boundary of objects
        #inp, ann_list, output_w, output_h, meta = self.mosaic_mix( index )

        if False:  # Augmnetation visualization
            img = inp.transpose(1, 2, 0)
            img = (img * self.std + self.mean) * 255
            for an in ann_list:
                bbox, cls_id, bbox2 = an
                bbox = bbox.astype(np.int32)
                bbox2 = bbox2.astype(np.int32)
                bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, img.shape[1])
                bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, img.shape[0])
                if bbox[2] - bbox[0] > 0 and bbox[3] - bbox[1] > 0:
                    cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                                  (255, 0, 0), 3)
                if bbox2.shape[0] > 0:
                    cv2.rectangle(img, (bbox2[0], bbox2[1]),
                                  (bbox2[2], bbox2[3]), (0, 255, 0), 2)
            cv2.imwrite('temp_%d.jpg' % (index), img)

        num_objs = min(len(ann_list), self.max_objs)
        num_classes = self.num_classes
        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_reg = np.zeros((4, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        dense_wh_mask = np.zeros((4, output_h, output_w), dtype=np.float32)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []
        xs = np.random.randint(output_w, size=(self.max_objs, 1))
        ys = np.random.randint(output_h, size=(self.max_objs, 1))
        bgs = np.concatenate([xs, ys], axis=1)

        for k in range(num_objs):
            bbox, cls_id, bbox2 = ann_list[k]

            bbox /= self.opt.down_ratio
            bbox2 /= self.opt.down_ratio

            oh, ow = bbox[3] - bbox[1], bbox[2] - bbox[0]
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            if (h / (oh + 0.01) < 0.9 or w /
                (ow + 0.01) < 0.9) and bbox2.shape[0] > 0:
                bbox = bbox2
                h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            #get center of box
            ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                          dtype=np.float32)
            ct_int = ct.astype(np.int32)

            if (h > 2 or h / (oh + 0.01) > 0.5) and (w > 2 or w /
                                                     (ow + 0.01) > 0.5):
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius

                draw_dense_reg(dense_reg, dense_wh_mask, ct_int, bbox, radius)
                draw_gaussian(hm[cls_id], ct_int, radius)

                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        dense_wh = dense_reg[:2, :, :]
        dense_off = dense_reg[2:, :, :]

        ret = {
            'input': inp,
            'hm': hm,
            'dense_wh': dense_wh,
            'dense_off': dense_off,
            'dense_wh_mask': dense_wh_mask[:2]
        }
        if self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 6), dtype=np.float32)
            meta = {
                'c': meta[0],
                's': meta[1],
                'gt_det': gt_det,
                'img_id': img_id
            }
            ret['meta'] = meta
        return ret
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(
            self.img_dir,
            self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)
        labels = np.array(
            [self.cat_ids[anno['category_id']] for anno in annotations])
        bboxes = np.array([anno['bbox'] for anno in annotations],
                          dtype=np.float32)
        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])
        bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        center = np.array([width / 2., height / 2.],
                          dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0

        flipped = False
        if self.split == 'train':
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(128, width)
            h_border = get_border(128, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        trans_img = get_affine_transform(
            center, scale, 0, [self.img_size['w'], self.img_size['h']])
        img = cv2.warpAffine(img, trans_img,
                             (self.img_size['w'], self.img_size['h']))

        img = (img.astype(np.float32) / 255.)
        if self.split == 'train':
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        trans_fmap = get_affine_transform(
            center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']])

        hmap = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # heatmap
        w_h_ = np.zeros((self.max_objs, 2),
                        dtype=np.float32)  # width and height
        regs = np.zeros((self.max_objs, 2), dtype=np.float32)  # regression
        inds = np.zeros((self.max_objs, ), dtype=np.int64)
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)

        for k, (bbox, label) in enumerate(zip(bboxes, labels)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                obj_c = np.array([(bbox[0] + bbox[2]) / 2,
                                  (bbox[1] + bbox[3]) / 2],
                                 dtype=np.float32)
                obj_c_int = obj_c.astype(np.int32)

                radius = max(
                    0,
                    int(
                        gaussian_radius((math.ceil(h), math.ceil(w)),
                                        self.gaussian_iou)))
                draw_umich_gaussian(hmap[label], obj_c_int, radius)
                w_h_[k] = 1. * w, 1. * h
                regs[k] = obj_c - obj_c_int  # discretization error
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                ind_masks[k] = 1

        return {
            'image': img,
            'hmap': hmap,
            'w_h_': w_h_,
            'regs': regs,
            'inds': inds,
            'ind_masks': ind_masks,
            'c': center,
            's': scale,
            'img_id': img_id
        }
Example #22
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)

        anns = list(
            filter(
                lambda x: x['category_id'] in self._valid_ids and x['iscrowd']
                != 1, anns))
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0
        rot = 0

        flipped = False
        if self.split == 'train':
            if self.cfg.DATASET.RANDOM_CROP:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.cfg.DATASET.SCALE
                cf = self.cfg.DATASET.SHIFT
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            if np.random.random() < self.cfg.DATASET.AUG_ROT:
                rf = self.cfg.DATASET.ROTATE
                rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)

            if np.random.random() < self.cfg.DATASET.FLIP:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(
            c, s, rot, [self.cfg.MODEL.INPUT_RES, self.cfg.MODEL.INPUT_RES])
        inp = cv2.warpAffine(
            img,
            trans_input, (self.cfg.MODEL.INPUT_RES, self.cfg.MODEL.INPUT_RES),
            flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.cfg.DATASET.NO_COLOR_AUG:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - np.array(self.cfg.DATASET.MEAN).astype(
            np.float32)) / np.array(self.cfg.DATASET.STD).astype(np.float32)
        inp = inp.transpose(2, 0, 1)

        output_res = self.cfg.MODEL.OUTPUT_RES
        num_joints = self.num_joints
        trans_output_rot = get_affine_transform(c, s, rot,
                                                [output_res, output_res])
        trans_output = get_affine_transform(c, s, 0, [output_res, output_res])
        trans_seg_output = get_affine_transform(c, s, 0,
                                                [output_res, output_res])
        hm = np.zeros((self.num_classes, output_res, output_res),
                      dtype=np.float32)
        hm_hp = np.zeros((num_joints, output_res, output_res),
                         dtype=np.float32)
        dense_kps = np.zeros((num_joints, 2, output_res, output_res),
                             dtype=np.float32)
        dense_kps_mask = np.zeros((num_joints, output_res, output_res),
                                  dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        seg = np.zeros((self.max_objs, output_res, output_res),
                       dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        kps_mask = np.zeros((self.max_objs, self.num_joints * 2),
                            dtype=np.uint8)
        hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
        hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
        hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)

        draw_gaussian = draw_msra_gaussian if self.cfg.LOSS.MSE_LOSS else \
                        draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(ann['category_id']) - 1
            pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3)
            segment = self.coco.annToMask(ann)
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                pts[:, 0] = width - pts[:, 0] - 1
                for e in self.flip_idx:
                    pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()
                segment = segment[:, ::-1]

            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox = np.clip(bbox, 0, output_res - 1)
            segment = cv2.warpAffine(segment,
                                     trans_seg_output,
                                     (output_res, output_res),
                                     flags=cv2.INTER_LINEAR)
            segment = segment.astype(np.float32)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if (h > 0 and w > 0) or (rot != 0):
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = self.cfg.hm_gauss if self.cfg.LOSS.MSE_LOSS else max(
                    0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_res + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1

                #mask
                pad_rate = 0.3
                segment_mask = np.ones_like(segment)
                x,y = (np.clip([ct[0] - (1 + pad_rate)*w/2 ,ct[0] + (1 + pad_rate)*w/2 ],0,output_res - 1)*2).astype(np.int), \
                      (np.clip([ct[1] - (1 + pad_rate)*h/2 , ct[1] + (1 + pad_rate)*h/2],0,output_res - 1)*2).astype(np.int)
                segment_mask[y[0]:y[1], x[0]:x[1]] = 0
                segment[segment_mask == 1] = 255
                seg[k] = segment

                #keypoint
                num_kpts = pts[:, 2].sum()
                if num_kpts == 0:
                    hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
                    reg_mask[k] = 0

                hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                hp_radius = self.cfg.hm_gauss \
                            if self.cfg.LOSS.MSE_LOSS else max(0, int(hp_radius))
                for j in range(num_joints):
                    if pts[j, 2] > 0:
                        pts[j, :2] = affine_transform(pts[j, :2],
                                                      trans_output_rot)
                        if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
                            pts[j, 1] >= 0 and pts[j, 1] < output_res:
                            kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int
                            kps_mask[k, j * 2:j * 2 + 2] = 1
                            pt_int = pts[j, :2].astype(np.int32)
                            hp_offset[k * num_joints + j] = pts[j, :2] - pt_int
                            hp_ind[k * num_joints +
                                   j] = pt_int[1] * output_res + pt_int[0]
                            hp_mask[k * num_joints + j] = 1
                            if self.cfg.LOSS.DENSE_HP:
                                # must be before draw center hm gaussian
                                draw_dense_reg(dense_kps[j],
                                               hm[cls_id],
                                               ct_int,
                                               pts[j, :2] - ct_int,
                                               radius,
                                               is_offset=True)
                                draw_gaussian(dense_kps_mask[j], ct_int,
                                              radius)
                            draw_gaussian(hm_hp[j], pt_int, hp_radius)
                draw_gaussian(hm[cls_id], ct_int, radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] +
                    h / 2, 1
                ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])
        if rot != 0:
            hm = hm * 0 + 0.9999
            reg_mask *= 0
            kps_mask *= 0
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'hps': kps,
            'hps_mask': kps_mask,
            'seg': seg
        }
        if self.cfg.LOSS.DENSE_HP:
            dense_kps = dense_kps.reshape(num_joints * 2, output_res,
                                          output_res)
            dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res,
                                                    output_res)
            dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask],
                                            axis=1)
            dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res,
                                                    output_res)
            ret.update({
                'dense_hps': dense_kps,
                'dense_hps_mask': dense_kps_mask
            })
            del ret['hps'], ret['hps_mask']
        if self.cfg.LOSS.REG_OFFSET:
            ret.update({'reg': reg})
        if self.cfg.LOSS.HM_HP:
            ret.update({'hm_hp': hm_hp})
        if self.cfg.LOSS.REG_HP_OFFSET:
            ret.update({
                'hp_offset': hp_offset,
                'hp_ind': hp_ind,
                'hp_mask': hp_mask
            })
        if self.cfg.DEBUG > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                   np.zeros((1, 40), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Example #23
0
    def __get_rotated_coco(self, img, anns, num_objs):
        kpts = []
        kpts_tmp = []
        for k in range(num_objs):
            ann = anns[k]
            ann_rotated = get_annotation_with_angle(ann)
            ann_rotated[4] = ann_rotated[4]
            rot = rotate_bbox(*ann_rotated)
            kpts.extend([Keypoint(*x) for x in rot])

            if self.num_keypoints > 0:
                if 'keypoints' not in ann:
                    ann['keypoints'] = np.zeros((3 * self.num_keypoints, ))

                kpt = [
                    Keypoint(*x)
                    for x in np.array(ann['keypoints']).reshape(-1, 3)[:, :2]
                ]
                kpts_tmp.extend(kpt)

        idx_boxes = len(kpts)
        if self.num_keypoints > 0:
            kpts.extend(kpts_tmp)

        kpts = KeypointsOnImage(kpts, shape=img.shape)

        if self.augmentation is not None:
            img_aug, kpts_aug = self.augmentation(image=img, keypoints=kpts)
        else:
            img_aug, kpts_aug = np.copy(img), kpts.copy()

        img_aug, kpts_aug = self.resize(image=img_aug, keypoints=kpts_aug)

        img = (img_aug.astype(np.float32) / 255.)
        inp = (img - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = self.input_size[1] // self.down_ratio
        output_w = self.input_size[0] // self.down_ratio
        num_classes = self.num_classes

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_detections, 3), dtype=np.float32)
        reg = np.zeros((self.max_detections, 2), dtype=np.float32)
        ind = np.zeros((self.max_detections), dtype=np.int64)
        reg_mask = np.zeros((self.max_detections), dtype=np.uint8)
        gt_det = np.zeros(
            (self.max_detections, 7 if self.use_rotated_boxes else 6),
            dtype=np.float32)
        gt_areas = np.zeros((self.max_detections), dtype=np.float32)

        if self.num_keypoints > 0:
            kp = np.zeros((self.max_detections, self.num_keypoints * 2),
                          dtype=np.float32)
            gt_kp = np.zeros((self.max_detections, self.num_keypoints, 2),
                             dtype=np.float32)
            kp_reg_mask = np.zeros(
                (self.max_detections, self.num_keypoints * 2), dtype=np.uint8)

        kpts_aug = self.resize_out(keypoints=kpts_aug)

        box_kpts_aug, kpts_aug = kpts_aug[:idx_boxes], kpts_aug[idx_boxes:]
        assert num_objs == len(box_kpts_aug) // 4

        for k in range(num_objs):
            ann = anns[k]
            points = []
            for p in box_kpts_aug[k * 4:k * 4 + 4]:
                box_kp = list(
                    (np.clip(p.x, 0,
                             output_w - 1), np.clip(p.y, 0, output_h - 1)))
                points.append(box_kp)

            points = np.array(points).astype(np.float32)
            cv_ct, cv_wh, cv_angle = cv2.minAreaRect(points)

            if cv_wh[0] == 0 or cv_wh[1] == 0:
                continue

            cx, cy, w, h, angle = get_annotation_with_angle({
                'rbbox':
                np.array([cv_ct[0], cv_ct[1], cv_wh[0], cv_wh[1], cv_angle])
            })
            ct = np.array((cx, cy))

            cls_id = int(self.cat_mapping[ann['category_id']])

            if h > 0 and w > 0:
                radius = gaussian_radius((np.ceil(h), np.ceil(w)))
                radius = max(0, int(radius))
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = w, h, angle
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                gt_det[k] = ([ct[0], ct[1], w, h, angle, 1, cls_id])

                if self.num_keypoints > 0:
                    valid = np.array(ann["keypoints"]).reshape(-1, 3)[:, -1]
                    for i, p in enumerate(
                            kpts_aug[k * self.num_keypoints:k *
                                     self.num_keypoints + self.num_keypoints]):
                        kp[k][i * 2] = p.x - ct_int[0]
                        kp[k][i * 2 + 1] = p.y - ct_int[1]

                        is_valid = valid[i] == 2 and not p.is_out_of_image(
                            (output_w, output_w))
                        kp_reg_mask[k, i * 2] = int(is_valid)
                        kp_reg_mask[k, i * 2 + 1] = int(is_valid)
                        gt_kp[k][i] = p.x, p.y

                if "area" not in ann:
                    gt_areas[k] = w * h
                else:
                    gt_areas[k] = ann["area"]

        del box_kpts_aug
        del img_aug

        gt_det = np.array(gt_det,
                          dtype=np.float32) if len(gt_det) > 0 else np.zeros(
                              (1, 7), dtype=np.float32)

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'reg': reg,
            'gt_dets': gt_det,
            'gt_areas': gt_areas,
        }

        if self.num_keypoints > 0:
            ret['kps'] = kp
            ret['gt_kps'] = gt_kp
            ret['kp_reg_mask'] = kp_reg_mask
            del kpts_aug

        return ret
Example #24
0
    def __getitem__(self, index):
        index = 45236
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        assert os.path.exists(img_path), 'Image path does not exist: {}'.format(img_path)

        # Target has {'segmentation', 'area', 'iscrowd', 'image_id', 'bbox', 'category_id'}
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        target = self.coco.loadAnns(ids=ann_ids)

        # Separate out crowd annotations. These are annotations that signify a large crowd of
        # objects of said class, where there is no annotation for each individual object.
        target = [x for x in target if not ('iscrowd' in x and x['iscrowd'])]

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]

        if len(target) > 0:
            # Pool all the masks for this image into one [num_objects,height,width] matrix
            masks = [self.coco.annToMask(obj).reshape(-1) for obj in target]
            masks = np.vstack(masks)
            masks = masks.reshape(-1, height, width)
            # if doesn't transpose, error will occur in augmentation (line 100)
            masks = masks.transpose(1, 2, 0)

        # labels = [int(self.cat_ids[obj['category_id']]) for obj in target]

        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                masks = masks[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img, trans_input,
                             (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        if self.rgb:
            inp = inp[..., ::-1]
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        d1 = masks.shape[2]
        masks = cv2.warpAffine(masks, trans_input,
                               (input_w, input_h),
                               flags=cv2.INTER_LINEAR)
        masks = np.expand_dims(masks, 2) if masks.ndim != 3 else masks
        d2 = masks.shape[2]
        assert d1 == d2
        masks = masks.transpose(2, 0, 1)
        masks = (masks >= 0.5).astype(np.uint8)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        # centers = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian

        segm_masks = []
        gt_det = []
        num_objs = min(len(target), self.max_objs)
        for k in range(num_objs):
            ann = target[k]

            # convert bboxes to point_form (xmin, ymin, xmax, ymax)
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            # After augmentation some masks will be empty.
            if h > 0 and w > 0 and masks[k].sum() > 0.0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2], dtype=np.float32)
                ct_int = ct.astype(np.int32)
                # centers[k] = ct_int[0], ct_int[1]
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
                det = [ct[0] - w / 2, ct[1] - h / 2,
                               ct[0] + w / 2, ct[1] + h / 2, cls_id]
                gt_det.append(det)
                segm_masks.append(masks[k])

        if len(segm_masks) > 0:
            masks = np.stack(segm_masks)
            gt_det = np.stack(gt_det)

        ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind,
               'wh': wh, 'masks': masks, 'gt_bbox_lbl': gt_det}

        # ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh,
        #        'masks': masks, 'centers': centers, 'gt_bbox_lbl': gt_det}

        # ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh,
        #        'masks': masks, 'labels': labels, 'crowd': crowd, 'centers': centers, 'gt_bbox': gt_det}

        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Example #25
0
    def __getitem__(self, files_index):

        for i, c in enumerate(self.cds):
            if files_index >= c:
                ds = list(self.label_files.keys())[i]
                start_index = c

        img_path = self.img_files[ds][files_index - start_index]
        label_path = self.label_files[ds][files_index - start_index]

        imgs, labels, img_path, (input_h, input_w) = self.get_data(
            img_path, label_path)
        for i, _ in enumerate(labels):
            if labels[i, 1] > -1:
                labels[i, 1] += self.tid_start_index[ds]

        output_h = imgs.shape[1] // self.opt.down_ratio
        output_w = imgs.shape[2] // self.opt.down_ratio
        num_classes = self.num_classes
        num_objs = labels.shape[0]
        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        if self.opt.ltrb:
            wh = np.zeros((self.max_objs, 4), dtype=np.float32)
        else:
            wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs, ), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs, ), dtype=np.uint8)
        ids = np.zeros((self.max_objs, ), dtype=np.int64)
        bbox_xys = np.zeros((self.max_objs, 4), dtype=np.float32)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian
        for k in range(num_objs):
            label = labels[k]
            bbox = label[2:]
            cls_id = int(label[0])
            bbox[[0, 2]] = bbox[[0, 2]] * output_w
            bbox[[1, 3]] = bbox[[1, 3]] * output_h
            bbox_amodal = copy.deepcopy(bbox)
            bbox_amodal[0] = bbox_amodal[0] - bbox_amodal[2] / 2.
            bbox_amodal[1] = bbox_amodal[1] - bbox_amodal[3] / 2.
            bbox_amodal[2] = bbox_amodal[0] + bbox_amodal[2]
            bbox_amodal[3] = bbox_amodal[1] + bbox_amodal[3]
            bbox[0] = np.clip(bbox[0], 0, output_w - 1)
            bbox[1] = np.clip(bbox[1], 0, output_h - 1)
            h = bbox[3]
            w = bbox[2]

            bbox_xy = copy.deepcopy(bbox)
            bbox_xy[0] = bbox_xy[0] - bbox_xy[2] / 2
            bbox_xy[1] = bbox_xy[1] - bbox_xy[3] / 2
            bbox_xy[2] = bbox_xy[0] + bbox_xy[2]
            bbox_xy[3] = bbox_xy[1] + bbox_xy[3]

            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = 6 if self.opt.mse_loss else radius
                #radius = max(1, int(radius)) if self.opt.mse_loss else radius
                ct = np.array([bbox[0], bbox[1]], dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                if self.opt.ltrb:
                    wh[k] = ct[0] - bbox_amodal[0], ct[1] - bbox_amodal[1], \
                            bbox_amodal[2] - ct[0], bbox_amodal[3] - ct[1]
                else:
                    wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                ids[k] = label[1]
                bbox_xys[k] = bbox_xy

        ret = {
            'input': imgs,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'reg': reg,
            'ids': ids,
            'bbox': bbox_xys
        }
        return ret
Example #26
0
    def _get_pre_dets(self, anns, trans_input, trans_output):
        hm_h, hm_w = self.opt.input_h, self.opt.input_w
        down_ratio = self.opt.down_ratio
        trans = trans_input
        reutrn_hm = self.opt.pre_hm
        pre_hm = np.zeros((1, hm_h, hm_w), dtype=np.float32) if reutrn_hm else None
        pre_cts, pre_whs, track_ids, pre_bboxes, pre_bbox_amodals= [], [], [],[],[]
        ignore_regions =[]
        for ann in anns:
            cls_id = int(ann['category_id'])
            if cls_id > self.opt.num_classes or cls_id <= -999 or cls_id <= 0 or (
                    'iscrowd' in ann and ann['iscrowd'] > 0):
                bbox, _ = self._get_bbox_output(
                    ann['bbox'], trans_output, hm_h, hm_w)
                ignore_regions.append(bbox)

        for ann in anns:
            cls_id = int(ann['category_id'])
            if cls_id > self.opt.num_classes or cls_id <= -99 or \
                    ('iscrowd' in ann and ann['iscrowd'] > 0):
                continue
            ## bbox input
            bbox = self._coco_box_to_bbox(ann['bbox'])
            bbox[:2] = affine_transform(bbox[:2], trans)
            bbox[2:] = affine_transform(bbox[2:], trans)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            max_rad = 1
            ignored = False
            if (h > 0 and w > 0):
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                max_rad = max(max_rad, radius)
                ct = np.array(
                    [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
                ct0 = ct.copy()
                conf = 1
                for area in ignore_regions:
                    if (area[0] <= ct[0] and ct[0] <= area[2]) and (area[1] <= ct[1] and ct[1] <= area[3]):
                        ignored = True
                        break
                if ignored:
                    continue
                else:
                    ct[0] = ct[0] + np.random.randn() * self.opt.hm_disturb * w
                    ct[1] = ct[1] + np.random.randn() * self.opt.hm_disturb * h
                    conf = 1 if np.random.random() > self.opt.lost_disturb else 0

                    ct_int = ct.astype(np.int32)
                    if conf == 0:
                        pre_cts.append(ct / down_ratio) ### output ct
                    else:
                        pre_cts.append(ct0 / down_ratio)

                    track_ids.append(ann['track_id'] if 'track_id' in ann else -1)
                    if reutrn_hm:
                        draw_umich_gaussian(pre_hm[0], ct_int, radius, k=conf)

                    if np.random.random() < self.opt.fp_disturb and reutrn_hm:
                        ct2 = ct0.copy()
                        # Hard code heatmap disturb ratio, haven't tried other numbers.
                        ct2[0] = ct2[0] + np.random.randn() * 0.05 * w
                        ct2[1] = ct2[1] + np.random.randn() * 0.05 * h
                        ct2_int = ct2.astype(np.int32)
                        draw_umich_gaussian(pre_hm[0], ct2_int, radius, k=conf)
                    ## get the bbox out
                    bbox_out, bbox_amodal = self._get_bbox_output(ann['bbox'], trans_output)
                    pre_bboxes.append(np.array(bbox_out))
                    pre_bbox_amodals.append(np.array(bbox_amodal))
                    h_out, w_out = bbox_out[3] - bbox_out[1], bbox_out[2] - bbox_out[0]
                    pre_wh = np.array(
                        [w_out, h_out], dtype=np.float32)
                    pre_whs.append(pre_wh)

        return pre_hm, pre_cts, track_ids, pre_whs, pre_bboxes,pre_bbox_amodals
Example #27
0
    def _add_instance(self,
                      ret,
                      gt_det,
                      k,
                      cls_id,
                      bbox,
                      bbox_amodal,
                      ann,
                      trans_output,
                      aug_s,
                      calib,
                      pre_cts=None,
                      track_ids=None):
        h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
        if h <= 0 or w <= 0:
            return
        radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        radius = max(0, int(radius))
        ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                      dtype=np.float32)
        ct_int = ct.astype(np.int32)
        ret['cat'][k] = cls_id - 1
        ret['mask'][k] = 1
        if 'wh' in ret:
            ret['wh'][k] = 1. * w, 1. * h
            ret['wh_mask'][k] = 1
        ret['ind'][k] = ct_int[1] * self.opt.output_w + ct_int[0]
        if 'reg' in ret:
            ret['reg'][k] = ct - ct_int
            ret['reg_mask'][k] = 1
        draw_umich_gaussian(ret['hm'][cls_id - 1], ct_int, radius)

        gt_det['bboxes'].append(
            np.array(
                [ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2],
                dtype=np.float32))
        gt_det['scores'].append(1)
        gt_det['clses'].append(cls_id - 1)
        gt_det['cts'].append(ct)

        if 'tracking' in self.opt.heads:
            if ann['track_id'] in track_ids and ann['track_id'] >= 0:
                pre_ct = pre_cts[track_ids.index(ann['track_id'])]
                ret['tracking_mask'][k] = 1
                ret['tracking'][k] = pre_ct - ct_int
                gt_det['tracking'].append(ret['tracking'][k])
            else:
                gt_det['tracking'].append(np.zeros(2, np.float32))

        if 'ltrb' in self.opt.heads:
            ret['ltrb'][k] = bbox[0] - ct_int[0], bbox[1] - ct_int[1], \
              bbox[2] - ct_int[0], bbox[3] - ct_int[1]
            ret['ltrb_mask'][k] = 1

        if 'ltrb_amodal' in self.opt.heads:
            ret['ltrb_amodal'][k] = \
              bbox_amodal[0] - ct_int[0], bbox_amodal[1] - ct_int[1], \
              bbox_amodal[2] - ct_int[0], bbox_amodal[3] - ct_int[1]
            ret['ltrb_amodal_mask'][k] = 1
            gt_det['ltrb_amodal'].append(bbox_amodal)

        if 'nuscenes_att' in self.opt.heads:
            if ('attributes' in ann) and ann['attributes'] > 0:
                att = int(ann['attributes'] - 1)
                ret['nuscenes_att'][k][att] = 1
                ret['nuscenes_att_mask'][k][self.nuscenes_att_range[att]] = 1
            gt_det['nuscenes_att'].append(ret['nuscenes_att'][k])

        if 'velocity' in self.opt.heads:
            if ('velocity' in ann) and min(ann['velocity']) > -1000:
                ret['velocity'][k] = np.array(ann['velocity'], np.float32)[:3]
                ret['velocity_mask'][k] = 1
            gt_det['velocity'].append(ret['velocity'][k])

        if 'hps' in self.opt.heads:
            self._add_hps(ret, k, ann, gt_det, trans_output, ct_int, bbox, h,
                          w)

        if 'rot' in self.opt.heads:
            self._add_rot(ret, ann, k, gt_det)

        if 'dep' in self.opt.heads:
            if 'depth' in ann:
                ret['dep_mask'][k] = 1
                ret['dep'][k] = ann['depth'] * aug_s
                gt_det['dep'].append(ret['dep'][k])
            else:
                gt_det['dep'].append(2)

        if 'dim' in self.opt.heads:
            if 'dim' in ann:
                ret['dim_mask'][k] = 1
                ret['dim'][k] = ann['dim']
                gt_det['dim'].append(ret['dim'][k])
            else:
                gt_det['dim'].append([1, 1, 1])

        if 'amodel_offset' in self.opt.heads:
            if 'amodel_center' in ann:
                amodel_center = affine_transform(ann['amodel_center'],
                                                 trans_output)
                ret['amodel_offset_mask'][k] = 1
                ret['amodel_offset'][k] = amodel_center - ct_int
                gt_det['amodel_offset'].append(ret['amodel_offset'][k])
            else:
                gt_det['amodel_offset'].append([0, 0])
Example #28
0
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = os.path.join(
            self.img_dir,
            self.coco.loadImgs(ids=[img_id])[0]['file_name'])
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        annotations = self.coco.loadAnns(ids=ann_ids)

        labels = np.array(
            [self.cat_ids[anno['category_id']] for anno in annotations])
        bboxes = np.array([anno['bbox'] for anno in annotations],
                          dtype=np.float32)

        if len(bboxes) == 0:
            bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
            labels = np.array([[0]])

        bboxes[:, 2:] += bboxes[:, :2]  # xywh to xyxy

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        # 获取中心坐标p
        center = np.array([width / 2., height / 2.],
                          dtype=np.float32)  # center of image
        scale = max(height, width) * 1.0  # 仿射变换

        flipped = False
        if self.split == 'train':
            # 随机选择一个尺寸来训练
            scale = scale * np.random.choice(self.rand_scales)
            w_border = get_border(128, width)
            h_border = get_border(128, height)
            center[0] = np.random.randint(low=w_border, high=width - w_border)
            center[1] = np.random.randint(low=h_border, high=height - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                center[0] = width - center[0] - 1

        # 仿射变换
        trans_img = get_affine_transform(
            center, scale, 0, [self.img_size['w'], self.img_size['h']])
        img = cv2.warpAffine(img, trans_img,
                             (self.img_size['w'], self.img_size['h']))

        # 归一化
        img = (img.astype(np.float32) / 255.)
        if self.split == 'train':
            # 对图片的亮度对比度等属性进行修改
            color_aug(self.data_rng, img, self.eig_val, self.eig_vec)

        img -= self.mean
        img /= self.std
        img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

        # 对Ground Truth heatmap进行仿射变换
        trans_fmap = get_affine_transform(
            center, scale, 0,
            [self.fmap_size['w'], self.fmap_size['h']])  # 这时候已经是下采样为原来的四分之一了

        # 3个最重要的变量
        hmap = np.zeros(
            (self.num_classes, self.fmap_size['h'], self.fmap_size['w']),
            dtype=np.float32)  # heatmap
        w_h_ = np.zeros((self.max_objs, 2),
                        dtype=np.float32)  # width and height
        regs = np.zeros((self.max_objs, 2), dtype=np.float32)  # regression

        # indexs
        inds = np.zeros((self.max_objs, ), dtype=np.int64)
        # 具体选择哪些index
        ind_masks = np.zeros((self.max_objs, ), dtype=np.uint8)

        for k, (bbox, label) in enumerate(zip(bboxes, labels)):
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1

            # 对检测框也进行仿射变换
            bbox[:2] = affine_transform(bbox[:2], trans_fmap)
            bbox[2:] = affine_transform(bbox[2:], trans_fmap)
            # 防止越界
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
            # 得到高和宽
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            if h > 0 and w > 0:
                obj_c = np.array([(bbox[0] + bbox[2]) / 2,
                                  (bbox[1] + bbox[3]) / 2],
                                 dtype=np.float32)  # 中心坐标-浮点型
                obj_c_int = obj_c.astype(np.int32)  # 整型的中心坐标
                # 根据一元二次方程计算出最小的半径
                radius = max(
                    0,
                    int(
                        gaussian_radius((math.ceil(h), math.ceil(w)),
                                        self.gaussian_iou)))
                # 得到高斯分布
                draw_umich_gaussian(hmap[label], obj_c_int, radius)

                w_h_[k] = 1. * w, 1. * h

                # 记录偏移量
                regs[k] = obj_c - obj_c_int  # discretization error
                # 当前是obj序列中的第k个 = fmap_w * cy + cx = fmap中的序列数
                inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0]
                # 进行mask标记
                ind_masks[k] = 1

        return {
            'image': img,
            'hmap': hmap,
            'w_h_': w_h_,
            'regs': regs,
            'inds': inds,
            'ind_masks': ind_masks,
            'c': center,
            's': scale,
            'img_id': img_id
        }
Example #29
0
    def __getitem__(self, index):
        img_id = self.images[index]
        img_info = self.coco.loadImgs(ids=[img_id])[0]
        img_path = os.path.join(self.img_dir, img_info['file_name'])
        img = cv2.imread(img_path)
        if 'calib' in img_info:
            calib = np.array(img_info['calib'], dtype=np.float32)
        else:
            calib = self.calib

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.])
        if self.opt.keep_res:
            s = np.array([self.opt.input_w, self.opt.input_h], dtype=np.int32)
        else:
            s = np.array([width, height], dtype=np.int32)

        aug = False
        if self.split == 'train' and np.random.random() < self.opt.aug_ddd:
            aug = True
            sf = self.opt.scale
            cf = self.opt.shift
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            c[0] += img.shape[1] * np.clip(np.random.randn() * cf, -2 * cf,
                                           2 * cf)
            c[1] += img.shape[0] * np.clip(np.random.randn() * cf, -2 * cf,
                                           2 * cf)

        trans_input = get_affine_transform(
            c, s, 0, [self.opt.input_w, self.opt.input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (self.opt.input_w, self.opt.input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        # if self.split == 'train' and not self.opt.no_color_aug:
        #   color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        num_classes = self.opt.num_classes
        trans_output = get_affine_transform(
            c, s, 0, [self.opt.output_w, self.opt.output_h])

        hm = np.zeros((num_classes, self.opt.output_h, self.opt.output_w),
                      dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        dep = np.zeros((self.max_objs, 1), dtype=np.float32)
        rotbin = np.zeros((self.max_objs, 2), dtype=np.int64)
        rotres = np.zeros((self.max_objs, 2), dtype=np.float32)
        dim = np.zeros((self.max_objs, 3), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        rot_mask = np.zeros((self.max_objs), dtype=np.uint8)

        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)
        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian
        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            if cls_id <= -99:
                continue
            # if flipped:
            #   bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.opt.output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.opt.output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((h, w))
                radius = max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                if cls_id < 0:
                    ignore_id = [_ for _ in range(num_classes)] \
                                if cls_id == - 1 else  [- cls_id - 2]
                    if self.opt.rect_mask:
                        hm[ignore_id,
                           int(bbox[1]):int(bbox[3]) + 1,
                           int(bbox[0]):int(bbox[2]) + 1] = 0.9999
                    else:
                        for cc in ignore_id:
                            draw_gaussian(hm[cc], ct, radius)
                        hm[ignore_id, ct_int[1], ct_int[0]] = 0.9999
                    continue
                draw_gaussian(hm[cls_id], ct, radius)

                wh[k] = 1. * w, 1. * h
                gt_det.append([ct[0], ct[1], 1] + \
                              self._alpha_to_8(self._convert_alpha(ann['alpha'])) + \
                              [ann['depth']] + (np.array(ann['dim']) / 1).tolist() + [cls_id])
                if self.opt.reg_bbox:
                    gt_det[-1] = gt_det[-1][:-1] + [w, h] + [gt_det[-1][-1]]
                # if (not self.opt.car_only) or cls_id == 1: # Only estimate ADD for cars !!!
                if 1:
                    alpha = self._convert_alpha(ann['alpha'])
                    # print('img_id cls_id alpha rot_y', img_path, cls_id, alpha, ann['rotation_y'])
                    if alpha < np.pi / 6. or alpha > 5 * np.pi / 6.:
                        rotbin[k, 0] = 1
                        rotres[k, 0] = alpha - (-0.5 * np.pi)
                    if alpha > -np.pi / 6. or alpha < -5 * np.pi / 6.:
                        rotbin[k, 1] = 1
                        rotres[k, 1] = alpha - (0.5 * np.pi)
                    dep[k] = ann['depth']
                    dim[k] = ann['dim']
                    # print('        cat dim', cls_id, dim[k])
                    ind[k] = ct_int[1] * self.opt.output_w + ct_int[0]
                    reg[k] = ct - ct_int
                    reg_mask[k] = 1 if not aug else 0
                    rot_mask[k] = 1
        # print('gt_det', gt_det)
        # print('')
        ret = {
            'input': inp,
            'hm': hm,
            'dep': dep,
            'dim': dim,
            'ind': ind,
            'rotbin': rotbin,
            'rotres': rotres,
            'reg_mask': reg_mask,
            'rot_mask': rot_mask
        }
        if self.opt.reg_bbox:
            ret.update({'wh': wh})
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not ('train' in self.split):
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 18), dtype=np.float32)
            meta = {
                'c': c,
                's': s,
                'gt_det': gt_det,
                'calib': calib,
                'image_path': img_path,
                'img_id': img_id
            }
            ret['meta'] = meta

        return ret
Example #30
0
    def __getitem__(self, index):
        img_id = self.images[index]
        img = cv2.imread(img_id)
        height, width = img.shape[0], img.shape[1]
        # YOLO标注转换
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            anns = np.loadtxt(self.anno[index]).reshape(-1, 5)
        if anns.size:
            x1 = width * (anns[:, 1] - anns[:, 3] / 2)
            y1 = height * (anns[:, 2] - anns[:, 4] / 2)
            x2 = width * (anns[:, 1] + anns[:, 3] / 2)
            y2 = height * (anns[:, 2] + anns[:, 4] / 2)
            anns[:, 1] = x1
            anns[:, 2] = y1
            anns[:, 3] = x2
            anns[:, 4] = y2
        num_objs = min(len(anns), self.max_objs)

        # 数据变换
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(height, width) * 1.0
        rotation = 0
        shear = 0
        input_h, input_w = self.opt.input_h, self.opt.input_w

        hflipped = False
        vflipped = False
        if self.split == 'train':
            if self.shear:
                shear = np.clip(np.random.randn() * self.shear, -self.shear,
                                self.shear)
            if shear:
                if shear < 0:
                    img = img[:, ::-1, :]
                    anns[:, [1, 3]] = width - anns[:, [3, 1]] - 1

                M = np.array([[1, abs(shear), 0], [0, 1, 0]])

                nW = width + abs(shear * height)

                anns[:, [1, 3]] += ((anns[:, [2, 4]]) * abs(shear)).astype(int)

                img = cv2.warpAffine(img, M, (int(nW), height))

                if shear < 0:
                    img = img[:, ::-1, :]
                    anns[:, [1, 3]] = nW - anns[:, [3, 1]] - 1
                c[0] = nW / 2.
                s = max(nW, s)
                width = nW

            sf = self.scale
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if self.hflip and np.random.random() < self.hflip:
                hflipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1
            if self.vflip and np.random.random() < self.vflip:
                vflipped = True
                img = img[::-1, :, :]
                c[1] = height - c[1] - 1
            # 旋转参数设置
            if self.rotation:
                rotation = np.clip(np.random.randn() * self.rotation,
                                   -self.rotation, self.rotation)

        trans_input = get_affine_transform(c, s, rotation, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, rotation,
                                            [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        obj = np.zeros((output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        target = np.zeros((self.max_objs, 5), dtype=np.float32)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        gt_det = []
        if self.opt.task in ['fcos']:  #, 'ttf']:  # using original target
            trans_output = trans_input
            output_w, output_h = input_w, input_h
        for k in range(num_objs):
            bbox = anns[k, 1:]
            cls_id = int(anns[k, 0])
            if hflipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            if vflipped:
                bbox[[1, 3]] = height - bbox[[3, 1]] - 1
            lt = affine_transform(bbox[:2], trans_output)
            rb = affine_transform(bbox[2:], trans_output)
            rt = affine_transform(bbox[[2, 1]], trans_output)
            lb = affine_transform(bbox[[0, 3]], trans_output)
            bbox[:2] = np.min([lt, rb, rt, lb], axis=0)
            bbox[2:] = np.max([lt, rb, rt, lb], axis=0)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if self.opt.task in ['fcos']:  #, 'ttf']:
                target[k] = cls_id, bbox[0], bbox[1], bbox[2], bbox[3]
                if h > 0 and w > 0:
                    reg_mask[k] = 1
                continue
            if h > 0 and w > 0:
                obj[int(bbox[1]):int(bbox[3]) + 1,
                    int(bbox[0]):int(bbox[2]) + 1] = 1
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = 2 * radius / 3 if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                # reg_mask[k] = 2 - w * h / output_w / output_h
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        if self.opt.task in ['fcos']:  #, 'ttf']:
            ret = {'input': inp, 'target': target, 'mask': reg_mask}
            return ret
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.reg_obj:
            ret.update({'obj': obj[np.newaxis]})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret