def __getitem__(self, index):
    def _coco_box_to_bbox(box):
      bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
                      dtype=np.float32)
      return bbox

    def _get_border(border, size):
      i = 1
      while size - border // i <= border // i:
          i *= 2
      return border // i

    img_id = self.images[index]
    file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
    img_path = os.path.join(self.img_dir, file_name)
    ann_ids = self.coco.getAnnIds(imgIds=[img_id], iscrowd=0)  # remove crowd annotations
    anns = self.coco.loadAnns(ids=ann_ids)
    num_objs = min(len(anns), self.max_objs)

    img = cv2.imread(img_path)

    height, width = img.shape[0], img.shape[1]
    c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
    if self.opt.keep_res:
      input_h = (height | self.opt.pad) + 1
      input_w = (width | self.opt.pad) + 1
      s = np.array([input_w, input_h], dtype=np.float32)
    else:
      s = max(img.shape[0], img.shape[1]) * 1.0
      input_h, input_w = self.opt.input_h, self.opt.input_w

    flipped = False
    if self.split == 'train':
      if not self.opt.not_rand_crop:
        s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) # randomly choice a scale from 0.6 to 1.4
        w_border = _get_border(128, img.shape[1])
        h_border = _get_border(128, img.shape[0])
        c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
        c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
      else:
        sf = self.opt.scale
        cf = self.opt.shift
        c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
        c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
        s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)

      if np.random.random() < self.opt.flip:
        flipped = True
        img = img[:, ::-1, :]
        c[0] =  width - c[0] - 1

    # use affine transform to crop image to 512x512
    trans_input = get_affine_transform(
      c, s, 0, [input_w, input_h])
    inp = cv2.warpAffine(img, trans_input,
                         (input_w, input_h),
                         flags=cv2.INTER_LINEAR)
    inp = (inp.astype(np.float32) / 255.)
    if self.split == 'train' and not self.opt.no_color_aug:
      color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
    inp = (inp - self.mean) / self.std #???
    inp = inp.transpose(2, 0, 1)
    # output feature map
    output_h = input_h // self.opt.down_ratio
    output_w = input_w // self.opt.down_ratio
    num_classes = self.num_classes
    trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) # affine bbox

    hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) # how many classes
    wh = np.zeros((self.max_objs, 2), dtype=np.float32) # how many objects
    dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
    reg = np.zeros((self.max_objs, 2), dtype=np.float32)
    ind = np.zeros((self.max_objs), dtype=np.int64)
    reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
    cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
    cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)

    draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                    draw_umich_gaussian

    gt_det = []
    for k in range(num_objs):
        ann = anns[k]
        bbox = _coco_box_to_bbox(ann['bbox'])
        cls_id = int(self.cat_ids[ann['category_id']])
        if flipped:
          bbox[[0, 2]] = width - bbox[[2, 0]] - 1
        bbox[:2] = affine_transform(bbox[:2], trans_output) # affine bbox
        bbox[2:] = affine_transform(bbox[2:], trans_output)
        bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) # keep bbox output > value > 0, this means the object has been truncated
        bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
        h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
        if h > 0 and w > 0: # if w or h eq 0, it means the bbox is out of the picture
            # heat map
            radius = gaussian_radius((math.ceil(h), math.ceil(w)))
            radius = max(0, int(radius))
            radius = self.opt.hm_gauss if self.opt.mse_loss else radius
            ct = np.array(
                [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
            ct_int = ct.astype(np.int32)
            draw_gaussian(hm[cls_id], ct_int, radius) # get lable of gaussian hit map

            wh[k] = 1. * w, 1. * h # width and height of bbox
            ind[k] = ct_int[1] * output_w + ct_int[0]
            reg[k] = ct - ct_int # reg_offset, namely the difference between center of integer and center of floating points
            reg_mask[k] = 1
            cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
            cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
            if self.opt.dense_wh:
                draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
            gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
                           ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) # bbox of ground truth

    ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh}
    if self.opt.dense_wh:
      hm_a = hm.max(axis=0, keepdims=True)
      dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
      ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
      del ret['wh']
    elif self.opt.cat_spec_wh:
      ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
      del ret['wh']
    if self.opt.reg_offset:
      ret.update({'reg': reg})
    if self.opt.debug > 0 or not self.split == 'train':
      gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
               np.zeros((1, 6), dtype=np.float32)
      meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
      ret['meta'] = meta # ground truth
    return ret
Beispiel #2
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)

        # """
        HM_ATT = False
        PYFLOW = True
        ONE_CLASS_ONLY = True

        if not HM_ATT:
            if PYFLOW:
                if 'uav' in self.opt.dataset:
                    seg_path = os.path.join(
                        '/store/datasets/UAV/bgsubs',
                        os.path.dirname(file_name).split('/')[-1],
                        os.path.basename(file_name).replace('jpg', 'png'))
                else:
                    seg_path = os.path.join(
                        '/store/datasets/OlderUA-Detrac/pyflow-bgsubs',
                        os.path.dirname(file_name).split('/')[-1],
                        os.path.basename(file_name).replace('jpg', 'png'))
        # """

        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)

        channel_counter = len(self.coco.getCatIds())
        if not HM_ATT:
            bboxes = {}
            for ann in anns:
                if str(ann['category_id']) in bboxes:
                    bboxes[str(ann['category_id'])].append([
                        int(ann['bbox'][0]),
                        int(ann['bbox'][1]),
                        int(ann['bbox'][0] + ann['bbox'][2]),
                        int(ann['bbox'][1] + ann['bbox'][3])
                    ])
                else:
                    bboxes[str(ann['category_id'])] = [[
                        int(ann['bbox'][0]),
                        int(ann['bbox'][1]),
                        int(ann['bbox'][0] + ann['bbox'][2]),
                        int(ann['bbox'][1] + ann['bbox'][3])
                    ]]
        # for ann in anns:
        #    bboxes.append([int(ann['bbox'][0]),
        #                  int(ann['bbox'][1]),
        #                 int(ann['bbox'][0] + ann['bbox'][2]),
        #                int(ann['bbox'][1] + ann['bbox'][3])])
        num_objs = min(len(anns), self.max_objs)
        # print(img_path)
        img = cv2.imread(img_path)
        if not HM_ATT:
            if PYFLOW:
                seg_img = cv2.imread(seg_path, 0)  # hughes

            if not PYFLOW:
                if 'coco' in img_path:
                    if 'val' in img_path:
                        seg_dir = '/store/datasets/coco/annotations/stuff_val2017_pixelmaps'
                    else:
                        seg_dir = '/store/datasets/coco/annotations/stuff_train2017_pixelmaps'
                    stuff_img = cv2.imread(
                        os.path.join(seg_dir,
                                     file_name.replace('.jpg', '.png')))
                    seg_img = np.zeros([img.shape[0], img.shape[1]])
                    seg_img[stuff_img[:, :, 0] == 0] += 1
                    seg_img[stuff_img[:, :, 1] == 214] += 1
                    seg_img[stuff_img[:, :, 2] == 255] += 1
                    seg_img[seg_img == 3] = 255
                    seg_img[seg_img < 255] = 0
                else:
                    if not ONE_CLASS_ONLY:
                        seg_img = np.zeros(
                            [channel_counter, img.shape[0], img.shape[1]])
                        for label in range(1, channel_counter + 1):
                            if str(label) in bboxes:
                                for bbox in bboxes[str(label)]:
                                    seg_img[label - 1, bbox[1]:bbox[3],
                                            bbox[0]:bbox[2]] = 255
                    else:
                        seg_img = np.zeros([img.shape[0], img.shape[1]])
                        for label in range(1, channel_counter + 1):
                            if str(label) in bboxes:
                                for bbox in bboxes[str(label)]:
                                    seg_img[bbox[1]:bbox[3],
                                            bbox[0]:bbox[2]] = 255

        # seg_img = np.zeros([img.shape[0], img.shape[1]])
        # for bbox in bboxes:
        #   seg_img[bbox[1]:bbox[3], bbox[0]:bbox[2]] = 255
        # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", os.path.basename(file_name.replace('.jpg', '_rgb.jpg'))), seg_img_rgb)
        # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", os.path.basename(file_name)), seg_img)
        # exit()
        # print("IMG_SHAPE: ", img.shape, " MEAN: ", np.mean(img))
        # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", os.path.basename(file_name)), img)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                if not HM_ATT:
                    if ONE_CLASS_ONLY:
                        seg_img = seg_img[:, ::-1]
                    else:
                        seg_img = seg_img[:, ::-1, :]
                # print('img.shape: ', img.shape)
                # print('seg_img.shape: ', seg_img.shape)
                # exit()
                # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)), img)
                # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)).replace('.jpg', '_seg.jpg'), seg_img)
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        # print('TRANS INPUT SHAPE: ', trans_input.shape)
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)

        if not HM_ATT:
            if ONE_CLASS_ONLY:
                seg_inp = cv2.warpAffine(seg_img,
                                         trans_input, (input_w, input_h),
                                         flags=cv2.INTER_LINEAR)
            else:
                seg_inp = np.zeros((seg_img.shape[0], input_w, input_h))
                for channel in range(seg_img.shape[0]):
                    seg_inp[channel, :, :] = cv2.warpAffine(
                        seg_img[channel, :, :],
                        trans_input, (input_w, input_h),
                        flags=cv2.INTER_LINEAR)

        inp = (inp.astype(np.float32) / 255.)
        if not HM_ATT:
            seg_inp = (seg_inp.astype(np.float32) / 255.)  # hughes
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

        # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)), inp)
        # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)).replace('.jpg', '_seg.jpg'), seg_inp)

        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)
        # print('MEAN: ', np.average(seg_inp))

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        if self.opt.elliptical_gt:
            draw_gaussian = draw_ellipse_gaussian
        else:
            draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                if self.opt.elliptical_gt:
                    radius_x = radius if h > w else int(radius * (w / h))
                    radius_y = radius if w >= h else int(radius * (h / w))
                    # radius_x = radius if w > h else int(radius / (w/h))
                    # radius_y = radius if h >= w else int(radius / (h/w))
                    draw_gaussian(hm[cls_id], ct_int, radius_x, radius_y)
                else:
                    draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])
        if not HM_ATT:
            if ONE_CLASS_ONLY:
                # scale_percent = 25  # percent of original size
                # width = int(seg_inp.shape[1] * scale_percent / 100)
                # height = int(seg_inp.shape[0] * scale_percent / 100)
                # dim = (width, height)
                # seg_inp = cv2.resize(seg_inp, dim, interpolation=cv2.INTER_AREA)
                seg_inp = np.expand_dims(seg_inp, 0)
        # print(seg_inp.shape)
        # print(hm.shape)
        # print(inp.shape)
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'seg': seg_inp,
            'ct_att': hm
        }  # 'seg': seg_inp}  # 'seg': np.expand_dims(seg_inp, 0)}
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta

        # ret['seg'] = ret['hm']
        # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)), (inp.transpose(1, 2, 0)* 255).astype(np.uint8))
        # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)).replace('.jpg', '_seg.jpg'), (seg_inp.squeeze(0) * 255).astype(np.uint8))
        # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/test_images/hm/", "hm_" + os.path.basename(file_name)), (hm.squeeze(0) * 255).astype(np.uint8))

        return ret
Beispiel #3
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)
        if img is None:
            print(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
    def __getitem__(self, index):
        img_id = self.images[index]
        img_path = self.data_dir + f"/JPEGImages/{img_id}.jpg"

        if self.has_landmark == 1:
            anns = self._get_annotation_lm(img_id)
        else:
            anns = self._get_annotation(img_id)

        num_objs = min(len(anns), self.max_objs)
        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) * self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        kpts_reg = np.zeros((self.max_objs, 10), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)

        #kpts_mask 关键点回归非heatmap 预测
        kpts_mask = np.zeros((self.max_objs, 10), dtype=np.uint8)

        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            box, landmarks, label = anns[k]
            bbox = np.array(box, dtype=np.float32)
            lm = np.array(landmarks, dtype=np.float32)
            cls_id = int(self.cat_ids[label])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1

                flag_lm = 0
                for idx in range(10):
                    flag_lm += lm[idx]
                if flag_lm > 1:
                    for idx in range(0, 10, 2):
                        lm[idx:idx + 2] = affine_transform(
                            lm[idx:idx + 2], trans_output)
                        if lm[idx] >= 0 and lm[idx] < output_w and \
                                lm[idx + 1] >= 0 and lm[idx + 1]<output_h:
                            kpts_mask[k, idx:idx + 2] = 1
                            kpts_reg[k][idx] = (lm[idx] - ct_int[0])
                            kpts_reg[k][idx + 1] = (lm[idx + 1] - ct_int[1])

                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.kpts_reg:  #关键点回归的添加
            ret.update({'kpts_reg': kpts_reg})
            ret.update({'kpts_mask': kpts_mask})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Beispiel #5
0
    def __getitem__(self, index):
        mosaic_pro = random.random()
        if mosaic_pro > 0:
            img_id = self.images[index]
            img, labels = self.load_mosaic(index)
            all_ann = []
            for da_label in labels:
                da_label = da_label.tolist()
                for da_l in da_label:
                    all_ann.append(da_l)
            num_objs = min(len(all_ann), self.max_objs)
        else:
            positive_aug = random.random()
            if positive_aug > 2:
                index1 = random.randint(0, self.num_samples - 1)
                # chartlet_dir = "/home/raid5/daming/HandDataMix/TrainImg/AnnImgMix"
                img_id = self.images[index]
                img_id1 = self.images[index1]

                file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
                file_name1 = self.coco.loadImgs(ids=[img_id1])[0]['file_name']

                path_num = random.random()
                img_path = os.path.join(self.img_dir, file_name)
                img_path1 = os.path.join(self.img_dir, file_name1)
                # if path_num > 0.5:
                #   img_path = os.path.join(chartlet_dir, file_name)

                ann_ids = self.coco.getAnnIds(imgIds=[img_id])
                ann_ids1 = self.coco.getAnnIds(imgIds=[img_id1])

                anns = self.coco.loadAnns(ids=ann_ids)
                anns1 = self.coco.loadAnns(ids=ann_ids1)

                img = cv2.imread(img_path)
                img1 = cv2.imread(img_path1)
                hand_num = len(anns1)
                if hand_num > 0:
                    for ann1 in anns1:
                        ran_id = random.randint(0, 26000)
                        hand_x = ann1['bbox'][0]
                        hand_y = ann1['bbox'][1]
                        hand_w = ann1['bbox'][2]
                        hand_h = ann1['bbox'][3]
                        temp = img1[hand_y:hand_y + hand_h,
                                    hand_x:hand_x + hand_w]
                        temp_h, temp_w, c = temp.shape
                        src_h, src_w, src_c = img.shape
                        for n in range(100):
                            min_src = min(src_w, src_h)
                            max_temp = max(temp_h, temp_w)
                            if (max_temp > 0.5 * min_src):
                                break
                            if (src_w < temp_w or src_h < temp_h):
                                break
                            x_tmp = random.randint(0, src_w - temp_w)
                            y_tmp = random.randint(0, src_h - temp_h)
                            src_rect = [
                                x_tmp, y_tmp, x_tmp + temp_w, y_tmp + temp_h
                            ]
                            iou_all = 0
                            for gt in anns:
                                gt = [
                                    gt['bbox'][0], gt['bbox'][1],
                                    gt['bbox'][0] + gt['bbox'][2],
                                    gt['bbox'][1] + gt['bbox'][3]
                                ]
                                iou = self.compute_iou(gt, src_rect)
                                iou_all = iou_all + iou
                                # print(iou_all)
                                if iou_all == 0:
                                    img[y_tmp:y_tmp + temp_h,
                                        x_tmp:x_tmp + temp_w] = temp
                                    a = {
                                        'bbox': [x_tmp, y_tmp, temp_w, temp_h],
                                        'category_id': 1
                                    }
                                    anns.append(a)
                                    break
                    num_objs = min(len(anns), self.max_objs)
            else:
                img_id = self.images[index]
                file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
                # daming_dir = "/home/raid5/daming/HandDataMix/TrainImg/AnnImgMix"
                img_path = os.path.join(self.img_dir, file_name)
                # img_path1 = os.path.join(daming_dir, file_name)
                ann_ids = self.coco.getAnnIds(imgIds=[img_id])
                anns = self.coco.loadAnns(ids=ann_ids)
                num_objs = min(len(anns), self.max_objs)
                img = cv2.imread(img_path)
                # daming_num = random.random()
                # if daming_num > 0.5:
                #   img = cv2.imread(img_path)
                # else:
                #   img = cv2.imread(img_path1)

        gray_pro = random.random()
        if gray_pro > 2:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                # s = s * np.random.choice(np.arange(0.3, 1.2, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        iaa_pro = random.random()
        if iaa_pro > 2:
            aug_seq = iaa.Sequential(
                [iaa.MultiplyHueAndSaturation((0.5, 1.5), per_channel=True)])
            #   aug_seq = iaa.Sequential([
            #     iaa.Sometimes(
            #         0.5,
            #         iaa.GaussianBlur(sigma=(0, 0.5))
            #     ),
            #     iaa.LinearContrast((0.75, 1.5)),
            #     iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
            #     iaa.Multiply((0.8, 1.2), per_channel=0.2),
            # ], random_order=True)
            inp, _ = aug_seq(image=inp, bounding_boxes=None)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        # ind is the center index, reg is the offset of center point in extracted feature maps
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            if mosaic_pro > 0:
                ann = all_ann[k]
                bbox = np.array([
                    float(ann[0]),
                    float(ann[1]),
                    float(ann[2]),
                    float(ann[3])
                ],
                                dtype=np.float32)
            else:
                ann = anns[k]
                bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = 0
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                # print("- h : ", h," - w : ", w)
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta

        return ret
    def __getitem__(self, index):
        img_id = self.images[index]
        #loadImgs(ids=[img_id]) return a list, whose length = 1
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)
        cropped = False
        if self.split == 'train':
            if np.random.random() < 1:
                cropped = True
                file_name = file_name.split('.')[0] + 'crop.jpg'
                img_path = os.path.join(self.img_dir, file_name)
        if self.split == 'val':
            cropped = True

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0
        rot = 0

        flipped = False
        rotted = False

        # input_res is max(input_h, input_w), input is the size of original img
        if np.random.random() < self.opts.keep_inp_res_prob and max(
            (height | 127) + 1, (width | 127) + 1) < 1024:
            self.opts.input_h = (height | 127) + 1
            self.opts.input_w = (width | 127) + 1
            self.opts.output_h = self.opts.input_h // self.opts.down_ratio
            self.opts.output_w = self.opts.input_w // self.opts.down_ratio
            self.opts.input_res = max(self.opts.input_h, self.opts.input_w)
            self.opts.output_res = max(self.opts.output_h, self.opts.output_w)

        trans_input = get_affine_transform(
            c, s, rot, [self.opts.input_res, self.opts.input_res])
        inp = cv2.warpAffine(img,
                             trans_input,
                             (self.opts.input_res, self.opts.input_res),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        inp = (inp - self.mean) / self.std
        #change data shape to [3, input_size, input_size]
        inp = inp.transpose(2, 0, 1)

        #output_res is max(output_h, output_w), output is the size after down sampling
        output_res = self.opts.output_res
        num_joints = self.num_joints
        trans_output_rot = get_affine_transform(c, s, rot,
                                                [output_res, output_res])
        trans_output = get_affine_transform(c, s, 0, [output_res, output_res])

        hm = np.zeros((self.num_classes, output_res, output_res),
                      dtype=np.float32)
        hm_hp = np.zeros((num_joints, output_res, output_res),
                         dtype=np.float32)
        dense_kps = np.zeros((num_joints, 2, output_res, output_res),
                             dtype=np.float32)
        dense_kps_mask = np.zeros((num_joints, output_res, output_res),
                                  dtype=np.float32)

        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        kps = np.zeros((self.max_objs, 2 * num_joints), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        kps_mask = np.zeros((self.max_objs, self.num_joints * 2),
                            dtype=np.uint8)
        hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
        hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
        hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)

        draw_gaussian = draw_msra_gaussian if self.opts.mse_loss else \
            draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            if cropped:
                bbox = np.array(ann['bbox'])
            else:
                bbox = np.array(ann['org_bbox'])
            cls_id = int(ann['category_id']) - 1
            if cropped:
                pts = np.array(ann['keypoints'],
                               np.float32).reshape(num_joints, 3)
            else:
                pts = np.array(ann['org_keypoints'],
                               np.float32).reshape(num_joints, 3)
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                pts[:, 0] = width - pts[:, 0] - 1
                for joint_idx in self.flip_idx:
                    pts[joint_idx[0]], pts[joint_idx[1]] = pts[
                        joint_idx[1]].copy(), pts[
                            joint_idx[0]].copy()  #don't forget copy first
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            if rotted:
                pts_rot = np.zeros((num_joints, 2))
                for j in range(num_joints):
                    if pts[j, 2] > 0:
                        pts_rot[j, :2] = affine_transform(
                            pts[j, :2], trans_output_rot)
                bbox[:2] = np.min(pts_rot, axis=0)
                bbox[2:] = np.max(pts_rot, axis=0)
            bbox = np.clip(bbox, 0, output_res - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            if (h > 0 and w > 0) or (rot != 0):
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = self.opts.hm_gauss if self.opts.mse_loss else max(
                    0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_res + ct_int[0]
                reg[k] = ct - ct_int  # the error of center[x, y]
                reg_mask[k] = 1
                num_kpts = pts[:, 2].sum()  #whether joint can be seen or not
                if num_kpts == 0:
                    hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
                    reg_mask[k] = 0  #means this obj can'e be seen

                hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)),
                                            min_overlap=1)
                hp_radius = self.opts.hm_gauss if self.opts.mse_loss else max(
                    0, int(hp_radius))
                for j in range(num_joints):
                    if pts[j, 2] > 0:  #means this joint can be seen
                        pts[j, :2] = affine_transform(pts[j, :2],
                                                      trans_output_rot)
                        if pts[j, 0] >= 0 and pts[j, 0] < output_res and pts[
                                j, 1] >= 0 and pts[j, 1] < output_res:
                            kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int
                            kps_mask[k, j * 2:j * 2 + 2] = 1
                            pt_int = pts[j, :2].astype(np.int32)
                            hp_offset[k * num_joints + j] = pts[j, :2] - pt_int
                            hp_ind[k * num_joints +
                                   j] = pt_int[1] * output_res + pt_int[0]
                            hp_mask[k * num_joints + j] = 1
                            if self.opts.dense_hp:
                                # must be before draw center hm gaussian
                                draw_dense_reg(dense_kps[j],
                                               hm[cls_id],
                                               ct_int,
                                               pts[j, :2] - ct_int,
                                               radius,
                                               is_offset=True)
                                draw_gaussian(dense_kps_mask[j], ct_int,
                                              radius)
                            hp1 = draw_gaussian(hm_hp[j], pt_int, hp_radius)
                            # plt.imsave('/home/mry/Desktop/testimg/hp_{}_{}.jpg'.format(k, j), hp1)
                draw_gaussian(hm[cls_id], ct_int, radius)
                ##ge_det:x0, y0, x1, y1, joint1_x, joint1_y,...,joint17_x, joint17_y, cls_id
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] +
                    h / 2, 1
                ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])

        #if rot != 0:
        #    hm = hm * 0 + 0.9999
        #    reg_mask *= 0
        #    kps_mask *= 0
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'hps': kps,
            'hps_mask': kps_mask
        }

        if self.opts.dense_hp:
            dense_kps = dense_kps.reshape(num_joints * 2, output_res,
                                          output_res)
            dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res,
                                                    output_res)
            dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask],
                                            axis=1)
            dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res,
                                                    output_res)
            ret.update({
                'dense_hps': dense_kps,
                'dense_hps_mask': dense_kps_mask
            })
            del ret['hps'], ret['hps_mask']
        if self.opts.reg_offset:
            ret.update({'reg': reg})
        if self.opts.hm_hp:
            ret.update({'hm_hp': hm_hp})
        if self.opts.reg_hp_offset:
            ret.update({
                'hp_offset': hp_offset,
                'hp_ind': hp_ind,
                'hp_mask': hp_mask
            })
        if self.opts.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 40), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Beispiel #7
0
  def __getitem__(self, index):
    img_id = self.images[index]
    file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
    img_path = os.path.join(self.img_dir, file_name)
    ann_ids = self.coco.getAnnIds(imgIds=[img_id])
    anns = self.coco.loadAnns(ids=ann_ids)
    num_objs = min(len(anns), self.max_objs)

    img = cv2.imread(img_path)

    height, width = img.shape[0], img.shape[1]
    c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)

    if self.opt.keep_res:
      input_h = (height | self.opt.pad) + 1
      input_w = (width | self.opt.pad) + 1
      s = np.array([input_w, input_h], dtype=np.float32)
    else:
      s = max(img.shape[0], img.shape[1]) * 1.0
      input_h, input_w = self.opt.input_h, self.opt.input_w

      if self.split == 'train':
          input_w = self.patch_sizes[(self.getcount//self.opt.batch_size) % len(self.patch_sizes)]
          input_h = input_w

          self.getcount = 0 if self.getcount == self.num_samples else self.getcount + 1

    flipped = False

    if self.split == 'train':
      if not self.opt.not_rand_crop:
        s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))

        w_border = self._get_border(128, img.shape[1])
        h_border = self._get_border(128, img.shape[0])

        c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
        c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
      else:
        sf = self.opt.scale
        cf = self.opt.shift

        c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
        c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)

        s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)

      if np.random.random() < self.opt.flip:
        flipped = True
        img = img[:, ::-1, :]
        c[0] =  width - c[0] - 1


    trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
    inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
    inp = (inp.astype(np.float32) / 255.)

    if self.split == 'train' and not self.opt.no_color_aug:
      color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

    inp = (inp - self.mean) / self.std
    inp = inp.transpose(2, 0, 1)

    output_h = input_h // self.opt.down_ratio
    output_w = input_w // self.opt.down_ratio
    num_classes = self.num_classes

    trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

    hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
    wh = np.zeros((self.max_objs, 2), dtype=np.float32)
    dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
    reg = np.zeros((self.max_objs, 2), dtype=np.float32)
    ind = np.zeros((self.max_objs), dtype=np.int64)
    reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
    cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
    cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)

    draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                    draw_umich_gaussian

    gt_det = []

    allmask = np.zeros((output_h, output_w, self.opt.num_maskclasses+levelnum), dtype=np.uint8)

    for k in range(num_objs):
      ann = anns[k]
      bbox = self._coco_box_to_bbox(ann['bbox'])

      if ann['category_id'] not in self._valid_ids:
        continue

      cls_id = int(self.cat_ids[ann['category_id']])

      if flipped:
        bbox[[0, 2]] = width - bbox[[2, 0]] - 1

      bbox[:2] = affine_transform(bbox[:2], trans_output)
      bbox[2:] = affine_transform(bbox[2:], trans_output)
      bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
      bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)

      x1 = int(bbox[0])
      y1 = int(bbox[1])
      x2 = int(bbox[2])
      y2 = int(bbox[3])

      h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

      if h > 0 and w > 0:
        ### gen mask begin ###
        # clsbase = cls_id*9
        clsbase = 0*9
        mask = self.coco.annToMask(ann)

        if flipped:
          mask = mask[:, ::-1]

        mask = cv2.warpAffine(mask, trans_output, (output_w, output_h), flags=cv2.INTER_LINEAR)

        roi = mask[y1:y2, x1:x2]
        roi_h, roi_w = roi.shape

        if roi_h < 6 or roi_w < 6:
          continue

        l = size2level(output_w*output_h, roi_w*roi_h)
        allmask[:,:,self.opt.num_maskclasses+l] = np.bitwise_or(allmask[:,:,self.opt.num_maskclasses+l], mask)
        allmask[:,:,self.opt.num_maskclasses+l+1] = np.bitwise_or(allmask[:,:,self.opt.num_maskclasses+l+1], mask)

        roi_cx = roi_w//2
        roi_cy = roi_h//2
        cell_w = (roi_w+5)//6
        cell_h = (roi_h+5)//6

        allmaskroi = allmask[y1:y2, x1:x2, :]

        ww = max(6,cell_w//4)
        hh = max(6,cell_h//4)

        # TOP
        self.assignroi(0, allmaskroi, roi, 0,                0,                roi_cx-cell_w+ww, roi_cy-cell_h+hh)
        self.assignroi(1, allmaskroi, roi, roi_cx-cell_w-ww, 0,                roi_cx+cell_w+ww, roi_cy-cell_h+hh)
        self.assignroi(2, allmaskroi, roi, roi_cx+cell_w-ww, 0,                roi_w,            roi_cy-cell_h+hh)

        # MIDDLE
        self.assignroi(3, allmaskroi, roi, 0,                roi_cy-cell_h-hh, roi_cx-cell_w+ww, roi_cy+cell_h+hh)
        self.assignroi(4, allmaskroi, roi, roi_cx-cell_w-ww, roi_cy-cell_h-hh, roi_cx+cell_w+ww, roi_cy+cell_h+hh)
        self.assignroi(5, allmaskroi, roi, roi_cx+cell_w-ww, roi_cy-cell_h-hh, roi_w,            roi_cy+cell_h+hh)

        # BOTTOM
        self.assignroi(6, allmaskroi, roi, 0,                roi_cy+cell_h-hh, roi_cx-cell_w+ww, roi_h           )
        self.assignroi(7, allmaskroi, roi, roi_cx-cell_w-ww, roi_cy+cell_h-hh, roi_cx+cell_w+ww, roi_h           )
        self.assignroi(8, allmaskroi, roi, roi_cx+cell_w-ww, roi_cy+cell_h-hh, roi_w,            roi_h           )
        ### gen mask end ###

        radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        radius = max(0, int(radius))

        ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
        ct_int = ct.astype(np.int32)

        if self.opt.mse_loss:
          radius = self.opt.hm_gauss
          draw_gaussian(hm[cls_id], ct_int, radius)
        else:
          #draw_gaussian(hm[cls_id], ct_int, radius)
          xradius = int(gaussian_radius((math.ceil(w),math.ceil(w))))
          yradius = int(gaussian_radius((math.ceil(h),math.ceil(h))))
          draw_elipse_gaussian(hm[cls_id], ct_int, (xradius,yradius))

        wh[k] = 1. * w, 1. * h
        ind[k] = ct_int[1] * output_w + ct_int[0]
        reg[k] = ct - ct_int
        reg_mask[k] = 1

        cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
        cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1

        if self.opt.dense_wh:
          draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)

        gt_det.append([ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])

    #cv2.imwrite("./results/hehe.jpg", (hm.max(axis=0).squeeze()*255).astype(np.uint8))

    if index % 30 == 0:
      cv2.imwrite("./results/top.jpg", (allmask[:,:,0:3]*255).astype(np.uint8))
      cv2.imwrite("./results/middle.jpg", (allmask[:,:,3:6]*255).astype(np.uint8))
      cv2.imwrite("./results/bottom.jpg", (allmask[:,:,6:9]*255).astype(np.uint8))
      cv2.imwrite("./results/full.jpg", (((allmask[:,:,0:3]+allmask[:,:,3:6]+allmask[:,:,6:9]) > 0)*255).astype(np.uint8))
      cv2.imwrite("./results/large.jpg", (((allmask[:,:,9:12]) > 0)*255).astype(np.uint8))
      cv2.imwrite("./results/small.jpg", (((allmask[:,:,12:15]) > 0)*255).astype(np.uint8))

    ret = {
      'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh,
      'allmask': allmask.astype(np.float32).transpose(2, 0, 1)
    }

    if self.opt.dense_wh:
      hm_a = hm.max(axis=0, keepdims=True)
      dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
      ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
      del ret['wh']
    elif self.opt.cat_spec_wh:
      ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
      del ret['wh']

    if self.opt.reg_offset:
      ret.update({'reg': reg})

    #if self.opt.debug > 0 or not self.split == 'train':
    if not self.split == 'train':
      if len(gt_det) > 0:
        gt_det = np.array(gt_det, dtype=np.float32)
      else:
        gt_det = np.zeros((1, 6), dtype=np.float32)

      meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
      ret['meta'] = meta

    # img = cv2.warpAffine(img, trans_output, (output_w, output_h), flags=cv2.INTER_LINEAR)
    # img = img*allmask[:,:,:3]
    # cv2.imwrite("./results/maskit.jpg", img)

    return ret
Beispiel #8
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)
        #根据图像文件名,读入对应的关键点标注文件
        (filepath, tempfilename) = os.path.split(img_path)
        (filename, extension) = os.path.splitext(tempfilename)
        kps_path = os.path.join(
            '/media/srt/dataset/L_Shelf_0114/Kps_Ann', filename + '_kps.npy'
        )  #/media/srt/resource/Halcon_Project/L_shelf_dataset/HG_Dataset/kps
        kps_raw = np.load(kps_path)
        c3 = np.ones(6)
        kps_ann = np.column_stack((kps_raw, c3))  #将关键点维度变为[6,3]

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0  #对crop,shift进行赋值
        input_h, input_w = self.opt.input_h, self.opt.input_w  #在opt中定义的分辨率
        rot = 0

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale  #0
                cf = self.opt.shift  #0
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            #加上multi-pose中的随机旋转
            if np.random.random() < self.opt.aug_rot:
                rf = self.opt.rotate
                rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)

            # if np.random.random() < self.opt.flip:
            #     flipped = True
            #     img = img[:, ::-1, :]
            #     c[0] = width - c[0] - 1

        # 对输入执行仿射变换
        trans_input = get_affine_transform(c, s, rot, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)
        test_image = inp[1]  #用于与kps_hp可视化使用

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        num_kps = 6  #点数是否需要+1 ?

        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])
        trans_output_rot = get_affine_transform(c, s, rot,
                                                [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w),
                      dtype=np.float32)  #中心对应的hp
        hm_hp = np.zeros((num_kps, output_h, output_w),
                         dtype=np.float32)  #kps对应的hp
        #此处只是初始化,未赋值
        dense_kps = np.zeros((num_kps, 2, output_h, output_w),
                             dtype=np.float32)
        dense_kps_mask = np.zeros((num_kps, output_h, output_w),
                                  dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        kps = np.zeros((num_kps, num_kps * 2),
                       dtype=np.float32)  #其他关键点指向某个关键点的向量
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        kps_mask = np.zeros((self.max_objs, self.num_kps * 2), dtype=np.uint8)
        hp_offset = np.zeros((self.max_objs * num_kps, 2), dtype=np.float32)
        hp_ind = np.zeros((self.max_objs * num_kps), dtype=np.int64)
        hp_mask = np.zeros((self.max_objs * num_kps), dtype=np.int64)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian
        #获取标注各项数据的标志
        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            #pts的读入方式可以自行定义
            pts = np.array(kps_ann,
                           np.float32).reshape(num_kps,
                                               3)  #原来的按照coco数据集json标注读入

            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                num_kpts = pts[:, 2].sum()
                if num_kpts == 0:
                    hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
                    reg_mask[k] = 0
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1

                hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                hp_radius = self.opt.hm_gauss \
                    if self.opt.mse_loss else max(0, int(hp_radius))
                for j in range(num_kps):
                    if pts[j, 2] > 0:
                        #如果关键点的第3位>0,则对关键点进行变换
                        pts[j, :2] = affine_transform(pts[j, :2],
                                                      trans_output)  #对关键点进行变换
                        if pts[j, 0] >= 0 and pts[j, 0] < output_w and \
                                pts[j, 1] >= 0 and pts[j, 1] < output_h:
                            #计算其他点指向该点的向量
                            kps[j, j * 2:j * 2 + 2] = pts[:, :2] - pts[j, :2]
                            kps_mask[k, j * 2:j * 2 + 2] = 1
                            pt_int = pts[j, :2].astype(np.int32)
                            hp_offset[k * num_kps + j] = pts[j, :2] - pt_int
                            hp_mask[k * num_kps + j] = 1
                            if self.opt.dense_hp:
                                #必须在中心点hm gassian之前画
                                print('draw dense hp!!!')
                                draw_dense_reg(dense_kps[j],
                                               hm[cls_id],
                                               ct_int,
                                               pts[j, :2] - ct_int,
                                               radius,
                                               is_offset=True)
                                draw_gaussian(dense_kps_mask[j], ct_int,
                                              radius)
                            draw_gaussian(hm_hp[j], pt_int, hp_radius)
                            heatmap = np.squeeze(hm_hp[j])  #(1,160,240)
                            heatmap = cv2.resize(heatmap, (960, 640),
                                                 interpolation=cv2.INTER_CUBIC)
                            new_image = test_image + heatmap * 2
                            array_name = 'forbidden_s_c_kps_hp/visual_kps_' + str(
                                index) + '_' + str(j) + '.png'
                            # matplotlib.image.imsave(array_name, new_image)
                #画中心点的高斯图
                draw_gaussian(hm[cls_id], ct_int, radius)
                heatmap = np.squeeze(hm[cls_id])  # (1,160,240)
                heatmap = cv2.resize(heatmap, (960, 640),
                                     interpolation=cv2.INTER_CUBIC)
                new_image = test_image + heatmap * 2
                array_name = 'visual_center_' + str(index) + '.png'
                # matplotlib.image.imsave(array_name, new_image)

                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] +
                    h / 2, 1
                ] + pts[:, :2].reshape(num_kps * 2).tolist() + [cls_id])
                # gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
                #                ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
        #在原来的基础上增加了 'hps','hps_mask'
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'hps': kps,
            'hps_mask': kps_mask
        }
        if self.opt.dense_hp:
            dense_kps = dense_kps.reshape(num_kps * 2, output_h, output_w)
            dense_kps_mask = dense_kps_mask.reshape(num_kps, 1, output_h,
                                                    output_w)
            dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask],
                                            axis=1)
            dense_kps_mask = dense_kps_mask.reshape(num_kps * 2, output_h,
                                                    output_w)
            ret.update({
                'dense_hps': dense_kps,
                'dense_hps_mask': dense_kps_mask
            })
            del ret['hps'], ret['hps_mask']
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.hm_hp:
            ret.update({'hm_hp': hm_hp})
        if self.opt.reg_hp_offset:
            ret.update({
                'hp_offset': hp_offset,
                'hp_ind': hp_ind,
                'hp_mask': hp_mask
            })
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Beispiel #9
0
    def __getitem__(self, index):
        image_fn = self.flist[index]
        image = cv2.imread(image_fn)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        
        box_fn = str(Path(self.box_root)/(Path(image_fn).stem + '.txt'))
        
        if osp.exists(box_fn):
            xywh = np.loadtxt(box_fn)
        
        
            xx,yy,ww,hh = xywh
            x1,y1,x2,y2 = xx-ww/2,yy-hh/2,xx+ww/2,yy+hh/2

        
            boxes = np.array([[x1,y1,x2,y2]]).astype('float32')
        else:
            boxes = np.array([[0.0,0.0,1.0,1.0]]).astype('float32')
        
 
        if self.transform:
           image, boxes = self.transform(image, boxes)

        
        #generate box_gt for loss
        #box x1,y1,x2,y2, [0,1]
        output_h,output_w,grid_wh = self.configs.hh,self.configs.ww,self.configs.grid_wh
        hin,win = self.configs.image_size
        
        hm = np.zeros((self.configs.num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.configs.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        dense_xy = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.configs.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.configs.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.configs.max_objs), dtype=np.uint8)


        
        num_objs = min(boxes.shape[0], self.configs.max_objs)
        
        
#        gt_det = []
        for k in range(num_objs):
          bbox = boxes[k]
          h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
          if h > 0 and w > 0:
            radius = gaussian_radius((math.ceil(h*grid_wh), math.ceil(w*grid_wh)))
            radius = max(0, int(radius))
            #radius = self.opt.hm_gauss if self.opt.mse_loss else radius
            ct = np.array(
              [(bbox[0] + bbox[2]) / 2.0 * grid_wh, (bbox[1] + bbox[3]) / 2.0* grid_wh], dtype=np.float32)
            ct_int = ct.astype(np.int32)
            ct_int = np.clip(ct_int, 0, grid_wh-1)
            
            draw_umich_gaussian(hm[k], ct_int, radius)
            
            wh[k] = 1. * w, 1. * h
            ind[k] = ct_int[1] * output_w + ct_int[0]
            reg[k] = ct - ct_int
            reg_mask[k] = 1

            
            draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
            draw_dense_reg(dense_xy, hm.max(axis=0), ct_int, reg[k], radius)
            
#            gt_det.append([ct[0] - w / 2, ct[1] - h / 2, 
#                           ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
#        
        #ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh}
        #if self.opt.dense_wh:
        hm_a = hm.max(axis=0, keepdims=True)
        dense_mask = np.concatenate([hm_a, hm_a], axis=0)
        
        ret = {'hm': hm, 'wh': wh, 'xy': reg, 'ind': ind,'dense_xy': dense_xy,'dense_wh': dense_wh,'dense_mask':dense_mask, 'boxes': boxes}
        
        #ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
        #del ret['wh']
        #elif self.opt.cat_spec_wh:
          #ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
          #del ret['wh']
        #if self.opt.reg_offset:
          #ret.update({'reg': reg})
#        if self.opt.debug > 0 or not self.split == 'train':
#          gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
#                   np.zeros((1, 6), dtype=np.float32)
#          meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
#          ret['meta'] = meta
#        return ret        
#        
        
        return image, ret
Beispiel #10
0
    def _get_dota_item(self, index):
        img_id = self.images[index]
        img_info = self.coco.loadImgs(ids=[img_id])[0]
        file_name = img_info['file_name']

        filename = osp.splitext(file_name)[0]
        suffix = osp.splitext(file_name)[1]
        crop_str = list(map(str, img_info['crop']))
        crop_img_path = osp.join('/code/data/DOTA/crop800_80',
                                 '_'.join([filename] + crop_str) + suffix)
        if not osp.isfile(crop_img_path):
            img_path = os.path.join('/media/data/DOTA/trainval/images',
                                    file_name)
            img = cv2.imread(img_path)
            sx, sy, ex, ey = img_info['crop']
            img = img[sy:ey + 1, sx:ex + 1]
            cv2.imwrite(crop_img_path, img)
        else:
            img = cv2.imread(crop_img_path)

        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        # 如果不deep拷贝,修改了anns就修改了self.coco里的标签
        anns = copy.deepcopy(self.coco.loadAnns(ids=ann_ids))

        # if True:
        if self.opt.debug:
            segs = [ann['segmentation'][0] for ann in anns]
            cvtools.imwrite(
                cvtools.draw_boxes_texts(img.copy(),
                                         segs,
                                         box_format='polygon'),
                self.opt.debug_dir + '/{}'.format(file_name))

        if self.opt.flip:
            try:
                hv_flip = cvtools.RandomMirror(both=False)
                segs = [ann['segmentation'][0].copy() for ann in anns]
                for i, seg in enumerate(segs):
                    if len(seg) != 8:
                        segm_hull = cv2.convexHull(np.array(seg).reshape(
                            -1, 2).astype(np.float32),
                                                   clockwise=False)
                        xywha = cv2.minAreaRect(segm_hull)
                        segs[i] = cv2.boxPoints(xywha).reshape(-1).tolist()
                img, segs = hv_flip(img, segs)
                # if True:
                if self.opt.debug:
                    cvtools.imwrite(
                        cvtools.draw_boxes_texts(img.copy(),
                                                 segs,
                                                 box_format='polygon'),
                        self.opt.debug_dir + '/flip_{}'.format(file_name))
                for i in range(len(anns)):
                    anns[i]['segmentation'][0] = list(segs[i])
                    bbox = cv2.boundingRect(
                        np.array(segs[i], dtype=np.float32).reshape(-1, 2))
                    anns[i]['bbox'] = list(bbox)
            except Exception as e:
                print(e)
                return []

        if self.opt.rotate:
            rotate = cvtools.RandomRotate()
            segs = [ann['segmentation'][0].copy() for ann in anns]
            for i, seg in enumerate(segs):
                if len(seg) != 8:
                    segm_hull = cv2.convexHull(np.array(seg).reshape(
                        -1, 2).astype(np.float32),
                                               clockwise=False)
                    xywha = cv2.minAreaRect(segm_hull)
                    segs[i] = cv2.boxPoints(xywha).reshape(-1).tolist()
            img, segs = rotate(img, segs)
            # if True:
            if self.opt.debug:
                cvtools.imwrite(
                    cvtools.draw_boxes_texts(img.copy(),
                                             segs,
                                             box_format='polygon'),
                    self.opt.debug_dir + '/rotate_{}'.format(file_name))
            for i in range(len(anns)):
                anns[i]['segmentation'][0] = list(segs[i])
                bbox = cv2.boundingRect(
                    np.array(segs[i], dtype=np.float32).reshape(-1, 2))
                anns[i]['bbox'] = list(bbox)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        # self.opt.input_h = self.opt.input_w = 32 * random.randint(12, 20)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        # flipped = False
        if 'train' in self.split:
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            # if np.random.random() < self.opt.flip:
            #     flipped = True
            #     img = img[:, ::-1, :]
            #     c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)

        gt_boxes = np.array(
            [cvtools.x1y1wh_to_x1y1x2y2(ann['bbox']) for ann in anns])
        img_box = cvtools.xywh_to_x1y1x2y2(np.array([[c[0], c[1], s, s]]))
        img_box[0, 0::2] = np.clip(img_box[0, 0::2], 0, width - 1)
        img_box[0, 1::2] = np.clip(img_box[0, 1::2], 0, height - 1)
        iofs = cvtools.bbox_overlaps(gt_boxes, img_box, mode='iof')
        ids = np.where(iofs > 0.7)[0]
        if len(ids) == 0: return []
        anns = [anns[ind] for ind in ids]

        # if True:
        if self.opt.debug:
            segs = [ann['segmentation'][0].copy()
                    for ann in anns]  # 复制一份,否则是原视图
            inp_draw = inp.copy()
            for k in range(len(segs)):
                seg = segs[k]
                for i in range(0, len(seg), 2):
                    seg[i:i + 2] = affine_transform(seg[i:i + 2], trans_input)
                    # seg[i] = np.clip(seg[i], 0, input_w - 1)
                    # seg[i + 1] = np.clip(seg[i + 1], 0, input_h - 1)
                segm_hull = cv2.convexHull(np.array(seg).reshape(-1, 2).astype(
                    np.float32),
                                           clockwise=False)
                xy, _, _ = cv2.minAreaRect(segm_hull)
                cv2.circle(inp_draw, (int(xy[0]), int(xy[1])),
                           radius=5,
                           color=(0, 0, 255),
                           thickness=-1)
            cvtools.imwrite(
                cvtools.draw_boxes_texts(inp_draw,
                                         segs,
                                         draw_start=False,
                                         box_format='polygon'),
                osp.join(self.opt.debug_dir, 'trans_' + file_name))

        inp = (inp.astype(np.float32) / 255.)
        if 'train' in self.split and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        rets = []
        # out_size = []
        # for down_ratio in down_ratios:
        #     output_h = input_h // down_ratio
        #     output_w = input_w // down_ratio
        #     num_classes = self.num_classes
        #     out_size.append([output_w, output_h])
        #     # trans_output = get_affine_transform(c, s, 0, [output_w, output_h])
        #
        #     hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        #     wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        #     if self.opt.a_method == 2:
        #         angle = np.full((self.max_objs, 1), 0.5, dtype=np.float32)
        #     else:
        #         angle = np.zeros((self.max_objs, 1), dtype=np.float32)
        #     dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        #     reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        #     ind = np.zeros((self.max_objs), dtype=np.int64)
        #     reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        #     cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
        #     cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)
        #
        #     # for k in range(num_objs):
        #     #     cls_id = int(self.cat_ids[anns[k]['category_id']])
        #     #     draw_heatmap(hm[cls_id], osp.join(self.opt.debug_dir, 'heatmap_' + str(cls_id) + '_' + file_name))
        #     ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'a': angle}
        #     if self.opt.dense_wh:
        #         hm_a = hm.max(axis=0, keepdims=True)
        #         dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
        #         ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
        #         del ret['wh']
        #     elif self.opt.cat_spec_wh:
        #         ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
        #         del ret['wh']
        #     if self.opt.reg_offset:
        #         ret.update({'reg': reg})
        #     # if self.opt.debug > 0 or not self.split == 'train':
        #     #     gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
        #     #         np.zeros((1, 6), dtype=np.float32)
        #     #     meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
        #     #     ret['meta'] = meta
        #     rets.append(ret)
        #
        # draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
        #     draw_umich_gaussian
        # if not self.opt.fpn:
        #     output_w, output_h = out_size[0]
        #     trans_output = get_affine_transform(c, s, 0, out_size[0])
        #
        # for k in range(num_objs):
        #     ann = anns[k]
        #     cls_id = int(self.cat_ids[ann['category_id']])
        #
        #     # 确定GT分配给哪个FPN层
        #     if self.opt.fpn:
        #         bbox = ann['bbox']
        #         fpn_k = int(math.log(224. / math.sqrt(bbox[2] * bbox[3]), 2))
        #         if fpn_k < 0:
        #             fpn_k = 0
        #         if fpn_k > 2:
        #             fpn_k = 2
        #         ret = rets[fpn_k]
        #         output_w, output_h = out_size[fpn_k]
        #         trans_output = get_affine_transform(c, s, 0, out_size[fpn_k])
        #
        #     segm = np.array(ann['segmentation'][0])
        #     # if flipped:
        #     #     for i in range(0, len(segm), 2):
        #     #         segm[i] = width - segm[i] - 1
        #     for i in range(0, len(segm), 2):
        #         segm[i:i + 2] = affine_transform(segm[i:i + 2], trans_output)
        #         segm[i] = np.clip(segm[i], 0, output_w - 1)
        #         segm[i + 1] = np.clip(segm[i + 1], 0, output_h - 1)
        #
        #     segm_hull = cv2.convexHull(segm.reshape(-1, 2).astype(np.float32),
        #                                clockwise=False)
        #     xy, (w, h), a = cv2.minAreaRect(segm_hull)
        #     hm = ret['hm']
        #     reg_mask = ret['reg_mask']
        #     ind = ret['ind']
        #     wh = ret['wh']
        #     angle = ret['a']
        #     if h > 0 and w > 0:
        #         a, w, h = convert_angle(a, w, h, self.opt.a_method)
        #         ct = np.array(xy, dtype=np.float32)
        #         ct_int = ct.astype(np.int32)
        #         radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        #         radius = max(0, int(radius))
        #         radius = self.opt.hm_gauss if self.opt.mse_loss else radius
        #         # radius = np.array((h / 3., w / 3.), np.int32)
        #         draw_gaussian(hm[cls_id], ct_int, radius)
        #         wh[k] = 1. * w, 1. * h
        #         gt_a = a / 90.
        #         if self.opt.a_method == 2:
        #             gt_a = (a + 90.) / 180.
        #         angle[k] = gt_a
        #         ind[k] = ct_int[1] * output_w + ct_int[0]
        #         if 'reg' in ret:
        #             ret['reg'][k] = ct - ct_int
        #         reg_mask[k] = 1
        #         if 'cat_spec_wh' in ret:
        #             ret['cat_spec_wh'][k, cls_id * 2: cls_id * 2 + 2] = wh[k]
        #         if 'cat_spec_mask' in ret:
        #             ret['cat_spec_mask'][k, cls_id * 2: cls_id * 2 + 2] = 1
        #         if self.opt.dense_wh:
        #             draw_dense_reg(ret['dense_wh'], hm.max(axis=0), ct_int,
        #                            wh[k], radius)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        if self.opt.a_method == 2:
            angle = np.full((self.max_objs, 1), 0.5, dtype=np.float32)
        else:
            angle = np.zeros((self.max_objs, 1), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        anns = re_anns(anns, trans_output, output_w, output_h)
        num_objs = min(len(anns), self.max_objs)

        # if True:
        if self.opt.debug:
            gt_img = cv2.warpAffine(img,
                                    trans_output, (output_w, output_h),
                                    flags=cv2.INTER_LINEAR)
            segs = [ann['segmentation'][0] for ann in anns]
            cvtools.imwrite(
                cvtools.draw_boxes_texts(gt_img,
                                         segs,
                                         draw_start=False,
                                         box_format='polygon'),
                osp.join(self.opt.debug_dir, 'gt_' + file_name))

        bad_num = 0
        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            cls_id = int(self.cat_ids[ann['category_id']])
            segm = np.array(ann['segmentation'][0])
            # if flipped:
            #     for i in range(0, len(segm), 2):
            #         segm[i] = width - segm[i] - 1
            # for i in range(0, len(segm), 2):
            #     segm[i:i + 2] = affine_transform(segm[i:i + 2], trans_output)
            #     segm[i] = np.clip(segm[i], 0, output_w - 1)
            #     segm[i + 1] = np.clip(segm[i + 1], 0, output_h - 1)

            segm_hull = cv2.convexHull(segm.reshape(-1, 2).astype(np.float32),
                                       clockwise=False)
            xy, (w, h), a = cv2.minAreaRect(segm_hull)
            if xy[0] > output_w or xy[0] < 0 or xy[1] > output_h or xy[1] < 0:
                # TODO:查明为何会出现这种情况。P0750
                # xy中y下出现负值或大于127
                # print(file_name, ann, segm, xy)
                bad_num += 1
                continue
            if h > 0 and w > 0:
                a, w, h = convert_angle(a, w, h, self.opt.a_method)
                ct = np.array(xy, dtype=np.float32)
                ct_int = ct.astype(np.int32)
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                # radius = np.array((h / 3., w / 3.), np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                gt_a = a / 90.
                if self.opt.a_method == 2:
                    gt_a = (a + 90.) / 180.
                angle[k] = gt_a
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append(segm + [cls_id])
            else:
                bad_num += 1

        if bad_num == num_objs: return []
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'a': angle
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or 'train' not in self.split:
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        rets.append(ret)
        return rets
Beispiel #11
0
    def __getitem__(self, index):
        img_id = self.images[index]
        img = cv2.imread(img_id)
        height, width = img.shape[0], img.shape[1]
        # YOLO标注转换
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            anns = np.loadtxt(self.anno[index]).reshape(-1, 5)
        if anns.size:
            x1 = width * (anns[:, 1] - anns[:, 3] / 2)
            y1 = height * (anns[:, 2] - anns[:, 4] / 2)
            x2 = width * (anns[:, 1] + anns[:, 3] / 2)
            y2 = height * (anns[:, 2] + anns[:, 4] / 2)
            anns[:, 1] = x1
            anns[:, 2] = y1
            anns[:, 3] = x2
            anns[:, 4] = y2
        num_objs = min(len(anns), self.max_objs)

        # 数据变换
        c = np.array([width / 2., height / 2.], dtype=np.float32)
        s = max(height, width) * 1.0
        rotation = 0
        shear = 0
        input_h, input_w = self.opt.input_h, self.opt.input_w

        hflipped = False
        vflipped = False
        if self.split == 'train':
            if self.shear:
                shear = np.clip(np.random.randn() * self.shear, -self.shear,
                                self.shear)
            if shear:
                if shear < 0:
                    img = img[:, ::-1, :]
                    anns[:, [1, 3]] = width - anns[:, [3, 1]] - 1

                M = np.array([[1, abs(shear), 0], [0, 1, 0]])

                nW = width + abs(shear * height)

                anns[:, [1, 3]] += ((anns[:, [2, 4]]) * abs(shear)).astype(int)

                img = cv2.warpAffine(img, M, (int(nW), height))

                if shear < 0:
                    img = img[:, ::-1, :]
                    anns[:, [1, 3]] = nW - anns[:, [3, 1]] - 1
                c[0] = nW / 2.
                s = max(nW, s)
                width = nW

            sf = self.scale
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if self.hflip and np.random.random() < self.hflip:
                hflipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1
            if self.vflip and np.random.random() < self.vflip:
                vflipped = True
                img = img[::-1, :, :]
                c[1] = height - c[1] - 1
            # 旋转参数设置
            if self.rotation:
                rotation = np.clip(np.random.randn() * self.rotation,
                                   -self.rotation, self.rotation)

        trans_input = get_affine_transform(c, s, rotation, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, rotation,
                                            [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        obj = np.zeros((output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        target = np.zeros((self.max_objs, 5), dtype=np.float32)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        gt_det = []
        if self.opt.task in ['fcos']:  #, 'ttf']:  # using original target
            trans_output = trans_input
            output_w, output_h = input_w, input_h
        for k in range(num_objs):
            bbox = anns[k, 1:]
            cls_id = int(anns[k, 0])
            if hflipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            if vflipped:
                bbox[[1, 3]] = height - bbox[[3, 1]] - 1
            lt = affine_transform(bbox[:2], trans_output)
            rb = affine_transform(bbox[2:], trans_output)
            rt = affine_transform(bbox[[2, 1]], trans_output)
            lb = affine_transform(bbox[[0, 3]], trans_output)
            bbox[:2] = np.min([lt, rb, rt, lb], axis=0)
            bbox[2:] = np.max([lt, rb, rt, lb], axis=0)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if self.opt.task in ['fcos']:  #, 'ttf']:
                target[k] = cls_id, bbox[0], bbox[1], bbox[2], bbox[3]
                if h > 0 and w > 0:
                    reg_mask[k] = 1
                continue
            if h > 0 and w > 0:
                obj[int(bbox[1]):int(bbox[3]) + 1,
                    int(bbox[0]):int(bbox[2]) + 1] = 1
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = 2 * radius / 3 if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                # reg_mask[k] = 2 - w * h / output_w / output_h
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        if self.opt.task in ['fcos']:  #, 'ttf']:
            ret = {'input': inp, 'target': target, 'mask': reg_mask}
            return ret
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.reg_obj:
            ret.update({'obj': obj[np.newaxis]})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Beispiel #12
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)
        img_show = copy.deepcopy(img)

        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0
        rot = 0

        # flipped = False
        # if self.split == 'train':
        #     if not self.opt.not_rand_crop:
        #         s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
        #         w_border = self._get_border(128, img.shape[1])
        #         h_border = self._get_border(128, img.shape[0])
        #         c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
        #         c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
        #     else:
        #         sf = self.opt.scale
        #         cf = self.opt.shift
        #         c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
        #         c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
        #         s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
        #     if np.random.random() < self.opt.aug_rot:
        #         rf = self.opt.rotate
        #         rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)

        trans_input = get_affine_transform(
            c, s, rot, [self.opt.input_res, self.opt.input_res])
        inp = cv2.warpAffine(img,
                             trans_input,
                             (self.opt.input_res, self.opt.input_res),
                             flags=cv2.INTER_LINEAR)
        ################## plot input
        # cv2.imwrite('/Workspace/CenterNet/in_{}'.format(file_name), inp)

        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        # ----------------------------------------- inp finished
        output_res = self.opt.output_res
        self.num_joints = self.opt.num_joints
        num_joints = self.num_joints
        trans_output_rot = get_affine_transform(c, s, rot,
                                                [output_res, output_res])
        trans_output = get_affine_transform(c, s, 0, [output_res, output_res])
        ################# plot gt
        # inp_out = cv2.warpAffine(img_show, trans_output,
        #                          (output_res, output_res),
        #                          flags=cv2.INTER_LINEAR)
        # for k in range(num_objs):
        #     ann = anns[k]
        #     bbox_show = copy.deepcopy(ann['bbox'])
        #     bbox_show[:2] = affine_transform(bbox_show[:2], trans_output)
        #     bbox_show[2:4] = affine_transform(bbox_show[2:4], trans_output)
        #     bbox_show[4:6] = affine_transform(bbox_show[4:6], trans_output)
        #     bbox_show[6:8] = affine_transform(bbox_show[6:8], trans_output)
        #
        #     bbox_show = np.clip(bbox_show, 0, output_res - 1)
        #     ct = self._calculate_intersection_point(bbox_show)
        #     ct_int = ct.astype(np.int32)
        # countour = cv2.boxPoints(((bbox[0], bbox[1]), (bbox[2], bbox[3]), bbox[4] / math.pi * 180))
        # cv2.drawContours(inp_out, [np.array(bbox_show).reshape(4,2).astype(int)], 0, (0, 0, 255), 2)
        # cv2.circle(inp_out, tuple(ct_int), 2, (0, 0, 255), -1)
        # print('file {} num  {}'.format(file_name, num_objs))
        # cv2.imwrite('/Workspace/CenterNet/out_{}'.format(file_name), inp_out)

        hm = np.zeros((self.num_classes, output_res, output_res),
                      dtype=np.float32)
        hm_hp = np.zeros((num_joints, output_res, output_res),
                         dtype=np.float32)
        dense_kps = np.zeros((num_joints, 2, output_res, output_res),
                             dtype=np.float32)
        dense_kps_mask = np.zeros((num_joints, output_res, output_res),
                                  dtype=np.float32)
        kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        kps_mask = np.zeros((self.max_objs, self.num_joints * 2),
                            dtype=np.uint8)
        hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
        hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
        hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = np.array(ann['bbox'])
            cls_id = int(ann['category_id']) - 1
            pts = np.array(ann['segmentation'],
                           np.float32).reshape(num_joints, 2)

            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:4] = affine_transform(bbox[2:4], trans_output)
            bbox[4:6] = affine_transform(bbox[4:6], trans_output)
            bbox[6:8] = affine_transform(bbox[6:8], trans_output)

            bbox = np.clip(bbox, 0, output_res - 1)

            cx, cy, w, h, theta = self.polygonToRotRectangle(bbox)
            if (h > 0 and w > 0) or (rot != 0):
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = self.opt.hm_gauss if self.opt.mse_loss else max(
                    0, int(radius))
                ct = np.array([cx, cy]).astype(np.int32)
                # ct = self._calculate_intersection_point(bbox)
                ct_int = ct.astype(np.int32)

                ind[k] = ct_int[1] * output_res + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1

                hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                hp_radius = self.opt.hm_gauss \
                    if self.opt.mse_loss else max(0, int(hp_radius))
                for j in range(num_joints):
                    # if pts[j, 2] > 0:
                    if True:
                        pts[j, :2] = affine_transform(pts[j, :2],
                                                      trans_output_rot)
                        if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
                                        pts[j, 1] >= 0 and pts[j, 1] < output_res:
                            kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int
                            kps_mask[k, j * 2:j * 2 + 2] = 1
                            pt_int = pts[j, :2].astype(np.int32)
                            hp_offset[k * num_joints + j] = pts[j, :2] - pt_int
                            hp_ind[k * num_joints +
                                   j] = pt_int[1] * output_res + pt_int[0]
                            hp_mask[k * num_joints + j] = 1
                            if self.opt.dense_hp:
                                # must be before draw center hm gaussian
                                draw_dense_reg(dense_kps[j],
                                               hm[cls_id],
                                               ct_int,
                                               pts[j, :2] - ct_int,
                                               radius,
                                               is_offset=True)
                                draw_gaussian(dense_kps_mask[j], ct_int,
                                              radius)
                            draw_gaussian(hm_hp[j], pt_int, hp_radius)
                if not self.opt.ellipse:
                    draw_gaussian(hm[cls_id], ct_int, radius)
                else:
                    draw_ellipse_gaussian(hm[cls_id], ct_int, w, h, theta)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] +
                    h / 2, 1
                ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])
        if rot != 0:
            hm = hm * 0 + 0.9999
            reg_mask *= 0
            kps_mask *= 0

        hm = np.where(hm > 1e-2, hm, 0)
        ########### plot hm
        # for i in range(hm.shape[0]):
        #   idx = np.where(hm[i]>=0.05)
        #   inp_out[idx] = 0
        #   cv2.imwrite('/Workspace/CenterNet/hm_{}_{}'.format(i, file_name), hm[i] * 255)
        # cv2.imwrite('/Workspace/CenterNet/out_{}'.format(file_name), inp_out)
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'hps': kps,
            'hps_mask': kps_mask
        }
        if self.opt.dense_hp:
            dense_kps = dense_kps.reshape(num_joints * 2, output_res,
                                          output_res)
            dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res,
                                                    output_res)
            dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask],
                                            axis=1)
            dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res,
                                                    output_res)
            ret.update({
                'dense_hps': dense_kps,
                'dense_hps_mask': dense_kps_mask
            })
            del ret['hps'], ret['hps_mask']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.hm_hp:
            ret.update({'hm_hp': hm_hp})
        if self.opt.reg_hp_offset:
            ret.update({
                'hp_offset': hp_offset,
                'hp_ind': hp_ind,
                'hp_mask': hp_mask
            })
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 40), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Beispiel #13
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)
        label_sel = np.array([1.], dtype=np.float32)
        name_in = int(file_name[:6])
        if name_in > 14961 and name_in < 22480:
            label_sel[0] = 0.
        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0
        rot = 0
        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            # if np.random.random() < self.opt.aug_rot:
            #     rf = self.opt.rotate
            #     rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)
            #
            # if np.random.random() < self.opt.flip:
            #     flipped = True
            #     img = img[:, ::-1, :]
            #     c[0] = width - c[0] - 1

        trans_input = get_affine_transform(
            c, s, rot, [self.opt.input_w, self.opt.input_h])

        inp = cv2.warpAffine(
            img,
            trans_input,
            (self.opt.input_w, self.opt.input_h),
            #(self.opt.input_res, self.opt.input_res),
            flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        num_joints = self.num_joints
        trans_output = get_affine_transform(
            c, s, 0, [self.opt.output_w, self.opt.output_h])
        trans_output_inv = get_affine_transform(
            c, s, 0, [self.opt.output_w, self.opt.output_h], inv=1)
        hm = np.zeros((self.num_classes, self.opt.output_h, self.opt.output_w),
                      dtype=np.float32)
        hm_hp = np.zeros((num_joints, self.opt.output_h, self.opt.output_w),
                         dtype=np.float32)
        dense_kps = np.zeros(
            (num_joints, 2, self.opt.output_h, self.opt.output_w),
            dtype=np.float32)
        dense_kps_mask = np.zeros(
            (num_joints, self.opt.output_h, self.opt.output_w),
            dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dim = np.zeros((self.max_objs, 3), dtype=np.float32)
        location = np.zeros((self.max_objs, 3), dtype=np.float32)
        dep = np.zeros((self.max_objs, 1), dtype=np.float32)
        ori = np.zeros((self.max_objs, 1), dtype=np.float32)
        rotbin = np.zeros((self.max_objs, 2), dtype=np.int64)
        rotres = np.zeros((self.max_objs, 2), dtype=np.float32)
        rot_mask = np.zeros((self.max_objs), dtype=np.uint8)
        kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32)
        kps_cent = np.zeros((self.max_objs, 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        inv_mask = np.zeros((self.max_objs, self.num_joints * 2),
                            dtype=np.uint8)
        kps_mask = np.zeros((self.max_objs, self.num_joints * 2),
                            dtype=np.uint8)
        coor_kps_mask = np.zeros((self.max_objs, self.num_joints * 2),
                                 dtype=np.uint8)
        hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
        hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
        hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)
        rot_scalar = np.zeros((self.max_objs, 1), dtype=np.float32)
        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian
        calib = np.array(anns[0]['calib'], dtype=np.float32)
        calib = np.reshape(calib, (3, 4))

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(ann['category_id']) - 1
            pts = np.array(ann['keypoints'][:27],
                           np.float32).reshape(num_joints, 3)
            alpha1 = ann['alpha']
            orien = ann['rotation_y']
            loc = ann['location']
            if flipped:
                alpha1 = np.sign(alpha1) * np.pi - alpha1
                orien = np.sign(orien) * np.pi - orien
                loc[0] = -loc[0]
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                pts[:, 0] = width - pts[:, 0] - 1
                for e in self.flip_idx:
                    pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.opt.output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.opt.output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if (h > 0 and w > 0) or (rot != 0):
                alpha = self._convert_alpha(alpha1)
                if alpha < np.pi / 6. or alpha > 5 * np.pi / 6.:
                    rotbin[k, 0] = 1
                    rotres[k, 0] = alpha - (-0.5 * np.pi)
                if alpha > -np.pi / 6. or alpha < -5 * np.pi / 6.:
                    rotbin[k, 1] = 1
                    rotres[k, 1] = alpha - (0.5 * np.pi)
                rot_scalar[k] = alpha
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = self.opt.hm_gauss if self.opt.mse_loss else max(
                    0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * self.opt.output_w + ct_int[0]
                reg[k] = ct - ct_int
                dim[k] = ann['dim']
                # dim[k][0]=math.log(dim[k][0]/1.63)
                # dim[k][1] = math.log(dim[k][1]/1.53)
                # dim[k][2] = math.log(dim[k][2]/3.88)
                dep[k] = loc[2]
                ori[k] = orien
                location[k] = loc
                reg_mask[k] = 1
                num_kpts = pts[:, 2].sum()
                if num_kpts == 0:
                    hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
                    reg_mask[k] = 0
                rot_mask[k] = 1
                hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                hp_radius = self.opt.hm_gauss \
                    if self.opt.mse_loss else max(0, int(hp_radius))
                kps_cent[k, :] = pts[8, :2]
                for j in range(num_joints):
                    pts[j, :2] = affine_transform(pts[j, :2], trans_output)
                    kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int
                    kps_mask[k, j * 2:j * 2 + 2] = 1
                    if pts[j, 2] > 0:
                        #pts[j, :2] = affine_transform(pts[j, :2], trans_output)
                        if pts[j, 0] >= 0 and pts[j, 0] < self.opt.output_w and \
                                pts[j, 1] >= 0 and pts[j, 1] < self.opt.output_h:
                            #kps[k, j * 2: j * 2 + 2] = pts[j, :2] - ct_int
                            #kps_mask[k, j * 2: j * 2 + 2] = 1
                            inv_mask[k, j * 2:j * 2 + 2] = 1
                            coor_kps_mask[k, j * 2:j * 2 + 2] = 1
                            pt_int = pts[j, :2].astype(np.int32)
                            hp_offset[k * num_joints + j] = pts[j, :2] - pt_int
                            hp_ind[
                                k * num_joints +
                                j] = pt_int[1] * self.opt.output_w + pt_int[0]
                            hp_mask[k * num_joints + j] = 1
                            if self.opt.dense_hp:
                                # must be before draw center hm gaussian
                                draw_dense_reg(dense_kps[j],
                                               hm[cls_id],
                                               ct_int,
                                               pts[j, :2] - ct_int,
                                               radius,
                                               is_offset=True)
                                draw_gaussian(dense_kps_mask[j], ct_int,
                                              radius)
                            draw_gaussian(hm_hp[j], pt_int, hp_radius)
                if coor_kps_mask[k, 16] == 0 or coor_kps_mask[k, 17] == 0:
                    coor_kps_mask[k, :] = coor_kps_mask[k, :] * 0
                draw_gaussian(hm[cls_id], ct_int, radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] +
                    h / 2, 1
                ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])
        if rot != 0:
            hm = hm * 0 + 0.9999
            reg_mask *= 0
            kps_mask *= 0
        meta = {'file_name': file_name}
        if flipped:
            coor_kps_mask = coor_kps_mask * 0
            inv_mask = inv_mask * 0
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'hps': kps,
            'hps_mask': kps_mask,
            'dim': dim,
            'rotbin': rotbin,
            'rotres': rotres,
            'rot_mask': rot_mask,
            'dep': dep,
            'rotscalar': rot_scalar,
            'kps_cent': kps_cent,
            'calib': calib,
            'opinv': trans_output_inv,
            'meta': meta,
            "label_sel": label_sel,
            'location': location,
            'ori': ori,
            'coor_kps_mask': coor_kps_mask,
            'inv_mask': inv_mask
        }
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.hm_hp:
            ret.update({'hm_hp': hm_hp})
        if self.opt.reg_hp_offset:
            ret.update({
                'hp_offset': hp_offset,
                'hp_ind': hp_ind,
                'hp_mask': hp_mask
            })
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 40), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Beispiel #14
0
    def __getitem__(self, index):
        img_path = self.img_list[index]
        anns = self.anno_list[index]
        num_objs = min(len(anns), self.max_objs)
        # print('anns:\n',anns)
        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)

        s = max(img.shape[0], img.shape[1]) * 1.0
        input_h, input_w = self.input_h, self.input_w

        flipped = False

        if not self.not_rand_crop:
            s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
            w_border = self._get_border(128, img.shape[1])
            h_border = self._get_border(128, img.shape[0])
            c[0] = np.random.randint(low=w_border,
                                     high=img.shape[1] - w_border)
            c[1] = np.random.randint(low=h_border,
                                     high=img.shape[0] - h_border)
        else:
            sf = self.scale
            cf = self.shift
            c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
            c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

        if np.random.random() < self.flip:
            flipped = True
            img = img[:, ::-1, :]
            c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)

        if self.show:
            input_img = inp.copy()

        inp = (inp.astype(np.float32) / 255.)

        if not self.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.down_ratio
        output_w = input_w // self.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.mse_loss else draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            label, bbox = ann
            bbox = np.array(bbox)
            cls_id = int(self.dict2num[label] - 1)

            # print('bbox,cls_id : ',(bbox),(cls_id))
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)

            if self.show:
                cv2.putText(input_img, label, (int(bbox[0] * self.down_ratio),
                                               int(bbox[1] * self.down_ratio)),
                            cv2.FONT_HERSHEY_COMPLEX, 1,
                            self.voc_color[cls_id], 1)
                cv2.rectangle(input_img, (int(bbox[0] * self.down_ratio),
                                          int(bbox[1] * self.down_ratio)),
                              (int(bbox[2] * self.down_ratio),
                               int(bbox[3] * self.down_ratio)),
                              self.voc_color[cls_id], 1)

            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[
                    0]  # ind[k]: 0~128*128-1, object index in 128*128
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        if self.show:
            cv2.namedWindow("image", 0)
            cv2.imshow("image", input_img)
            cv2.namedWindow("heatmap", 0)
            cv2.imshow("heatmap", np.hstack(hm))
            cv2.waitKey(500)

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }
        if self.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.reg_offset:
            ret.update({'reg': reg})
        if self.debug > 0 or not self.state == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
              np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Beispiel #15
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']  # 读出图像名称
        img_path = os.path.join(self.img_dir, file_name)  # 图像完成文件名称
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)  # 读取图像对应的GT检测框

        num_objs = min(len(anns), self.max_objs)
        # 读入图像,并对图像进行预处理
        # print(img_id, img_path)
        img = cv2.imread(img_path)
        # import pdb
        # pdb.set_trace()
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train' or self.split == 'debug1':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.7, 1.3, 0.1))
                w_border = self._get_border(512, img.shape[1])
                h_border = self._get_border(512, img.shape[0])
                c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
                # c[0] = np.random.randint(low=0.4*img.shape[1], high=0.6*img.shape[1] )
                # c[1] = np.random.randint(low=0.4*img.shape[0], high=0.6*img.shape[0])
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        # 根据偏移的c和s得到变换矩阵,之后所有的框也可以按照变换矩阵进行移动
        trans_input = get_affine_transform(
            c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img, trans_input,
                             (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        # 0-255转为0-1
        if DEBUG:
            raw_img = inp.copy()
        inp = (inp.astype(np.float32) / 255.)
        # 色彩偏移
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        # 减均值除方差
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)
        # 图像预处理结束

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)  # heatmap

        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)  # dense的wh
        angle = np.zeros((self.max_objs, 1), dtype=np.float32)
        dense_angle = np.zeros((1, output_h, output_w), dtype=np.float32)  # dense的angle
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)  # offset偏差值
        ind = np.zeros((self.max_objs), dtype=np.int64)  # 物体在图像上编号,编号根据坐标得到
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)  # 对于图像变化后不存在了物体mask设置为0
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)  # 分类的长宽
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)  # 分类的长宽mask
        cat_spec_angle = np.zeros((self.max_objs, num_classes), dtype=np.float32)  # 分类的长宽
        cat_spec_angle_mask = np.zeros((self.max_objs, num_classes), dtype=np.uint8)  # 分类的长宽mask

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        gt_det = []
        # 遍历所有的物体
        for k in range(num_objs):
            ann = anns[k]
            # bbox = self._coco_box_to_bbox(ann['rbbox'])
            bbox = ann['rbbox']
            cls_id = int(self.cat_ids[ann['category_id']])

            # 跟随图像变化,对于检测框进行相同变换
            if flipped:
                # cx做镜像处理
                bbox[0] = width - bbox[0] - 1

            # 获取四个角点
            pt1, pt2, pt3, pt4 = self._get_four_points((bbox[0], bbox[1]), bbox[-1], bbox[2], bbox[3])
            pt1 = affine_transform((pt1[0, 0], pt1[0, 1]), trans_output)
            pt2 = affine_transform((pt2[0, 0], pt2[0, 1]), trans_output)
            pt3 = affine_transform((pt3[0, 0], pt3[0, 1]), trans_output)
            pt4 = affine_transform((pt4[0, 0], pt4[0, 1]), trans_output)

            # 得到中心点坐标,长宽以及角度
            ct = np.array(
                [(pt1[0] + pt3[0]) / 2, (pt1[1] + pt3[1]) / 2], dtype=np.float32)
            w = np.linalg.norm(pt1 - pt2)
            h = np.linalg.norm(pt1 - pt4)
            # 计算新的angle
            # vec_base = np.array([0, 1], dtype=np.float32)
            # vec_angle = np.array([(pt1[0] + pt2[0]) / 2, (pt1[1] + pt2[1]) / 2], dtype=np.float32) - ct
            # norm_base = np.linalg.norm(vec_base)
            # norm_angle = np.linalg.norm(vec_angle)
            # cos_angle = vec_base.dot(vec_angle) / (norm_base * norm_angle + np.finfo(float).eps)
            # a = np.arccos(cos_angle)

            if self.opt.dataset == 'hrsc':
                a = bbox[-1]
                if flipped:
                    a = np.pi - a
            elif self.opt.dataset == 'dota':
                a = bbox[-1]
                # ####### dota的json文件角度是0到2pi ##########
                if flipped:
                    a = 2 * np.pi - a
            elif self.opt.dataset == 'rosku':
                # ####### rosku的json文件角度是-0.5pi到0.5pi ##########
                a = bbox[-1] / math.pi
                if flipped:
                    a = -1 * a
                a = np.clip(a, -0.5, 0.5)
                a = a + 0.5
            else:
                raise Exception('Wrong dataset.')

            if DEBUG:
                color = [255, 0, 0]
                line_width = 2
                # ####### rosku的json文件角度是-0.5pi到0.5pi ##########
                # temp_a = (a - 0.5) * math.pi
                temp_a = a
                npt1, npt2, npt3, npt4 = self._get_four_points((ct[0], ct[1]), temp_a, w, h)
                npt1 = self._float_to_int(npt1)
                npt2 = self._float_to_int(npt2)
                npt3 = self._float_to_int(npt3)
                npt4 = self._float_to_int(npt4)
                cv2.line(raw_img, npt1, npt2, color, line_width)
                cv2.line(raw_img, npt2, npt3, color, line_width)
                cv2.line(raw_img, npt3, npt4, color, line_width)
                cv2.line(raw_img, npt4, npt1, color, line_width)


            if 0 <= ct[0] <= output_w - 1 and 0 <= ct[1] <= output_h - 1:
                # 热力图,GT进行一定扩散
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct_int = ct.astype(np.int32)
                # 中心点绘制GT
                draw_gaussian(hm[cls_id], ct_int, radius)

                wh[k] = 1. * w, 1. * h
                angle[k] = 1. * a
                ind[k] = ct_int[1] * output_w + ct_int[0]  # 物体在特征图上索引值
                reg[k] = ct - ct_int  # ct的实际值和整数化后的偏移
                reg_mask[k] = 1
                # wh设置
                cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)

                # angle设置
                cat_spec_angle[k, cls_id] = angle[k]
                cat_spec_angle_mask[k, cls_id] = 1
                if self.opt.dense_angle or self.opt.fsm:
                    draw_dense_reg(dense_angle, hm.max(axis=0), ct_int, angle[k], radius)
                # ang_radius = max(int(1.0), int(radius/2.))
                # draw_dense_reg_uni(dense_angle[0, :], ct_int, angle[k], ang_radius)
                gt_det.append([ct[0], ct[1], w, h, angle[k], 1, cls_id])

        ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'angle': angle}

        # wh
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
            del ret['wh']

        # angle
        if self.opt.dense_angle or self.opt.fsm:
            dense_angle_mask = hm.max(axis=0, keepdims=True)
            ret.update({'dense_angle': dense_angle, 'dense_angle_mask': dense_angle_mask})
            if self.opt.dense_angle:
                del ret['angle']
        elif self.opt.cat_spec_angle:
            ret.update({'cat_spec_angle': cat_spec_angle, 'cat_spec_angle_mask': cat_spec_angle_mask})
            del ret['angle']

        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 7), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id, 'img_name':file_name}
            ret['meta'] = meta

        if DEBUG:
            ret['raw_img'] = raw_img
            ret['gt_det'] = gt_det
            ret['img_id'] = img_id
            cv2.imwrite(os.path.join('./cache', '%s.jpg' % img_id), raw_img)
        return ret
Beispiel #16
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)
        img_show = copy.deepcopy(img)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        # flipped = False         # remove flip process
        ############## remove image preprocess
        # if self.split == 'train':
        #     if not self.opt.not_rand_crop:
        #         s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
        #         w_border = self._get_border(128, img.shape[1])
        #         h_border = self._get_border(128, img.shape[0])
        #         c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
        #         c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
        #     else:
        #         sf = self.opt.scale
        #         cf = self.opt.shift
        #         c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
        #         c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
        #         s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
        #
        #     if np.random.random() < self.opt.flip:
        #         flipped = True
        #         img = img[:, ::-1, :]
        #         c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        ################## plot
        # cv2.imwrite('/Workspace/CenterNet/in_{}'.format(file_name), inp)

        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])
        ################# plot
        # inp_out = cv2.warpAffine(img_show, trans_output,
        #                          (output_w, output_h),
        #                          flags=cv2.INTER_LINEAR)
        # for k in range(num_objs):
        #     ann = anns[k]
        #     bbox_show = copy.deepcopy(ann['bbox'])
        #     bbox_show[:2] = affine_transform(bbox_show[:2], trans_output)
        #     cv2.circle(inp_out, tuple(list(map(int, bbox_show[:2]))), 2, (0, 0, 255), -1)
        # print('file {} num  {}'.format(file_name, num_objs))
        # cv2.imwrite('/Workspace/CenterNet/out_{}'.format(file_name), inp_out)

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)

        ### add angle regression
        reg_angle = np.zeros((self.max_objs, 1), dtype=np.float32)

        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        ########### show gt
        # for k in range(num_objs):
        #   ann = anns[k]
        #   bbox = ann['bbox']
        #   bbox[:2] = affine_transform(bbox[:2], trans_output)
        #   bbox[2:4] = affine_transform(bbox[2:4], trans_output)
        #   bbox[0] = np.clip(bbox[0], 0, output_w - 1)
        #   bbox[1] = np.clip(bbox[1], 0, output_h - 1)
        #   self.getfourpoints(bbox, inp_out)
        # cv2.imwrite('/Workspace/CenterNet/gt_{}'.format(file_name), inp_out)

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            # bbox = self._coco_box_to_bbox(ann['bbox'])
            bbox = ann['bbox']
            cls_id = int(self.cat_ids[ann['category_id']])

            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:4] = affine_transform(bbox[2:4], trans_output)
            bbox[0] = np.clip(bbox[0], 0, output_w - 1)
            bbox[1] = np.clip(bbox[1], 0, output_h - 1)
            h, w = bbox[3], bbox[2]
            if h > 0 and w > 0:

                ct = np.array([bbox[0], bbox[1]], dtype=np.float32)
                ct_int = ct.astype(np.int32)
                reg_angle[k] = bbox[4]
                if not self.opt.ellipse:
                    radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                    radius = max(0, int(radius))
                    radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                    draw_gaussian(hm[cls_id], ct_int, radius)
                else:
                    draw_ellipse_gaussian(hm[cls_id], ct_int, w, h,
                                          reg_angle[k])
                wh[k] = 1. * w, 1. * h

                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])
        if self.opt.ellipse:
            hm = np.where(hm > 1e-2, hm, 0)
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'angle': reg_angle
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Beispiel #17
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)

        anns = list(
            filter(
                lambda x: x['category_id'] in self._valid_ids and x['iscrowd']
                != 1, anns))
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0
        rot = 0

        flipped = False
        if self.split == 'train':
            if self.cfg.DATASET.RANDOM_CROP:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.cfg.DATASET.SCALE
                cf = self.cfg.DATASET.SHIFT
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            if np.random.random() < self.cfg.DATASET.AUG_ROT:
                rf = self.cfg.DATASET.ROTATE
                rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)

            if np.random.random() < self.cfg.DATASET.FLIP:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(
            c, s, rot, [self.cfg.MODEL.INPUT_RES, self.cfg.MODEL.INPUT_RES])
        inp = cv2.warpAffine(
            img,
            trans_input, (self.cfg.MODEL.INPUT_RES, self.cfg.MODEL.INPUT_RES),
            flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.cfg.DATASET.NO_COLOR_AUG:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - np.array(self.cfg.DATASET.MEAN).astype(
            np.float32)) / np.array(self.cfg.DATASET.STD).astype(np.float32)
        inp = inp.transpose(2, 0, 1)

        output_res = self.cfg.MODEL.OUTPUT_RES
        num_joints = self.num_joints
        trans_output_rot = get_affine_transform(c, s, rot,
                                                [output_res, output_res])
        trans_output = get_affine_transform(c, s, 0, [output_res, output_res])
        trans_seg_output = get_affine_transform(c, s, 0,
                                                [output_res, output_res])
        hm = np.zeros((self.num_classes, output_res, output_res),
                      dtype=np.float32)
        hm_hp = np.zeros((num_joints, output_res, output_res),
                         dtype=np.float32)
        dense_kps = np.zeros((num_joints, 2, output_res, output_res),
                             dtype=np.float32)
        dense_kps_mask = np.zeros((num_joints, output_res, output_res),
                                  dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        seg = np.zeros((self.max_objs, output_res, output_res),
                       dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        kps_mask = np.zeros((self.max_objs, self.num_joints * 2),
                            dtype=np.uint8)
        hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
        hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
        hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)

        draw_gaussian = draw_msra_gaussian if self.cfg.LOSS.MSE_LOSS else \
                        draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(ann['category_id']) - 1
            pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3)
            segment = self.coco.annToMask(ann)
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                pts[:, 0] = width - pts[:, 0] - 1
                for e in self.flip_idx:
                    pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()
                segment = segment[:, ::-1]

            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox = np.clip(bbox, 0, output_res - 1)
            segment = cv2.warpAffine(segment,
                                     trans_seg_output,
                                     (output_res, output_res),
                                     flags=cv2.INTER_LINEAR)
            segment = segment.astype(np.float32)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if (h > 0 and w > 0) or (rot != 0):
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = self.cfg.hm_gauss if self.cfg.LOSS.MSE_LOSS else max(
                    0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_res + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1

                #mask
                pad_rate = 0.3
                segment_mask = np.ones_like(segment)
                x,y = (np.clip([ct[0] - (1 + pad_rate)*w/2 ,ct[0] + (1 + pad_rate)*w/2 ],0,output_res - 1)*2).astype(np.int), \
                      (np.clip([ct[1] - (1 + pad_rate)*h/2 , ct[1] + (1 + pad_rate)*h/2],0,output_res - 1)*2).astype(np.int)
                segment_mask[y[0]:y[1], x[0]:x[1]] = 0
                segment[segment_mask == 1] = 255
                seg[k] = segment

                #keypoint
                num_kpts = pts[:, 2].sum()
                if num_kpts == 0:
                    hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
                    reg_mask[k] = 0

                hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                hp_radius = self.cfg.hm_gauss \
                            if self.cfg.LOSS.MSE_LOSS else max(0, int(hp_radius))
                for j in range(num_joints):
                    if pts[j, 2] > 0:
                        pts[j, :2] = affine_transform(pts[j, :2],
                                                      trans_output_rot)
                        if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
                           pts[j, 1] >= 0 and pts[j, 1] < output_res:
                            kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int
                            kps_mask[k, j * 2:j * 2 + 2] = 1
                            pt_int = pts[j, :2].astype(np.int32)
                            hp_offset[k * num_joints + j] = pts[j, :2] - pt_int
                            hp_ind[k * num_joints +
                                   j] = pt_int[1] * output_res + pt_int[0]
                            hp_mask[k * num_joints + j] = 1
                            if self.cfg.LOSS.DENSE_HP:
                                # must be before draw center hm gaussian
                                draw_dense_reg(dense_kps[j],
                                               hm[cls_id],
                                               ct_int,
                                               pts[j, :2] - ct_int,
                                               radius,
                                               is_offset=True)
                                draw_gaussian(dense_kps_mask[j], ct_int,
                                              radius)
                            draw_gaussian(hm_hp[j], pt_int, hp_radius)
                draw_gaussian(hm[cls_id], ct_int, radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] +
                    h / 2, 1
                ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])
        if rot != 0:
            hm = hm * 0 + 0.9999
            reg_mask *= 0
            kps_mask *= 0
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'hps': kps,
            'hps_mask': kps_mask,
            'seg': seg
        }
        if self.cfg.LOSS.DENSE_HP:
            dense_kps = dense_kps.reshape(num_joints * 2, output_res,
                                          output_res)
            dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res,
                                                    output_res)
            dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask],
                                            axis=1)
            dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res,
                                                    output_res)
            ret.update({
                'dense_hps': dense_kps,
                'dense_hps_mask': dense_kps_mask
            })
            del ret['hps'], ret['hps_mask']
        if self.cfg.LOSS.REG_OFFSET:
            ret.update({'reg': reg})
        if self.cfg.LOSS.HM_HP:
            ret.update({'hm_hp': hm_hp})
        if self.cfg.LOSS.REG_HP_OFFSET:
            ret.update({
                'hp_offset': hp_offset,
                'hp_ind': hp_ind,
                'hp_mask': hp_mask
            })
        if self.cfg.DEBUG > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 40), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Beispiel #18
0
    def __getitem__(self, index):
        index = 45236
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        assert os.path.exists(img_path), 'Image path does not exist: {}'.format(img_path)

        # Target has {'segmentation', 'area', 'iscrowd', 'image_id', 'bbox', 'category_id'}
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        target = self.coco.loadAnns(ids=ann_ids)

        # Separate out crowd annotations. These are annotations that signify a large crowd of
        # objects of said class, where there is no annotation for each individual object.
        target = [x for x in target if not ('iscrowd' in x and x['iscrowd'])]

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]

        if len(target) > 0:
            # Pool all the masks for this image into one [num_objects,height,width] matrix
            masks = [self.coco.annToMask(obj).reshape(-1) for obj in target]
            masks = np.vstack(masks)
            masks = masks.reshape(-1, height, width)
            # if doesn't transpose, error will occur in augmentation (line 100)
            masks = masks.transpose(1, 2, 0)

        # labels = [int(self.cat_ids[obj['category_id']]) for obj in target]

        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                masks = masks[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img, trans_input,
                             (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        if self.rgb:
            inp = inp[..., ::-1]
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        d1 = masks.shape[2]
        masks = cv2.warpAffine(masks, trans_input,
                               (input_w, input_h),
                               flags=cv2.INTER_LINEAR)
        masks = np.expand_dims(masks, 2) if masks.ndim != 3 else masks
        d2 = masks.shape[2]
        assert d1 == d2
        masks = masks.transpose(2, 0, 1)
        masks = (masks >= 0.5).astype(np.uint8)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        # centers = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian

        segm_masks = []
        gt_det = []
        num_objs = min(len(target), self.max_objs)
        for k in range(num_objs):
            ann = target[k]

            # convert bboxes to point_form (xmin, ymin, xmax, ymax)
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            # After augmentation some masks will be empty.
            if h > 0 and w > 0 and masks[k].sum() > 0.0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2], dtype=np.float32)
                ct_int = ct.astype(np.int32)
                # centers[k] = ct_int[0], ct_int[1]
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
                det = [ct[0] - w / 2, ct[1] - h / 2,
                               ct[0] + w / 2, ct[1] + h / 2, cls_id]
                gt_det.append(det)
                segm_masks.append(masks[k])

        if len(segm_masks) > 0:
            masks = np.stack(segm_masks)
            gt_det = np.stack(gt_det)

        ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind,
               'wh': wh, 'masks': masks, 'gt_bbox_lbl': gt_det}

        # ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh,
        #        'masks': masks, 'centers': centers, 'gt_bbox_lbl': gt_det}

        # ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh,
        #        'masks': masks, 'labels': labels, 'crowd': crowd, 'centers': centers, 'gt_bbox': gt_det}

        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Beispiel #19
0
    def __getitem__(self, index):
        #函数为入口。这里我们可以得到我们输出参数,分别是\color{red}{inp, hm, reg\_mask, ind, wh}。
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)  # 目标个数,这里为100

        img = cv2.imread(img_path)
        #接着我们获取图片的最长边以及输入尺寸(512,512)
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.],
                     dtype=np.float32)  # 获取中心点
        if self.opt.keep_res:  # False
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:  # True
            s = max(img.shape[0], img.shape[1]) * 1.0  # s最长的边长
            input_h, input_w = self.opt.input_h, self.opt.input_w  # 512, 512

        #对数据进行一系列处理。最终输出结果即我们第一个所需要的输入图像\color{red}{inp}.
        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s  # * np.random.choice(np.arange(0.6, 1.4, 0.1))# 随机尺度
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1  # 随机裁剪

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)  # 放射变换
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        #接着我们需要完成我们的heatmap的生成。
        output_h = input_h // self.opt.down_ratio  # 输出512//4=128
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w),
                      dtype=np.float32)  # heatmap(80,128,128)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)  # 中心点宽高(100*2)
        angs = np.zeros((self.max_objs, 1), dtype=np.float32)  # 角度(100*2)
        dense_wh = np.zeros((2, output_h, output_w),
                            dtype=np.float32)  # 返回2*128*128
        reg = np.zeros((self.max_objs, 2),
                       dtype=np.float32)  # 记录下采样带来的误差,返回100*2的小数
        ind = np.zeros((self.max_objs), dtype=np.int64)  # 返回100个ind
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)  # 返回8个 回归mask
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)  # 100*80*2
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)  # 100*80*2
        #这里mse_loss为False, 所以我们只需要关注draw_umich_gaussian函数即可
        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                bbox[[4]] = 180 - bbox[[4]]
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:4] = affine_transform(bbox[2:4], trans_output)

            #这里是导致舰船检测过程中出现中心点偏移的关键

            #bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            #bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            #TODO insert
            ang = bbox[4]
            h = np.clip(h, 0, output_h - 1)
            w = np.clip(w, 0, output_w - 1)
            if h > 0 and w > 0:
                radius = gaussian_radius(
                    (math.ceil(h), math.ceil(w)))  #关键是如何确定高斯半径
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                if ct[0] < 0 or ct[0] > output_w - 1 or ct[1] < 0 or ct[
                        1] > output_h - 1:  #
                    continue
                # ct[0] = np.clip(ct[0], 0, output_w - 1)
                # ct[1] = np.clip(ct[1], 0, output_h - 1)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                #cv2.imwrite("/data/humaocheng/CenterNet-master/single_heatmap.jpg", hm[0]*255)
                wh[k] = 1. * w, 1. * h  # 目标矩形框的宽高——目标尺寸损失
                angs[k] = 1. * ang
                ind[k] = ct_int[1] * output_w + ct_int[
                    0]  # 目标中心点在128×128特征图中的索引
                reg[k] = ct - ct_int  # off Loss, # ct 即 center point reg是偏置回归数组,存放每个中心店的偏置值 k是当前图中第k个目标
                # 实际例子为
                # [98.97667 2.3566666] - [98  2] = [0.97667, 0.3566666]
                reg_mask[k] = 1  #是记录我们前100个点,这里相当于记载一张图片存在哪些目标,
                #有的话对应索引设置为1,其余设置为0。
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                # gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
                #                ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
                #TODO insert
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    ang, 1, cls_id
                ])
        # cv2.imwrite("/data/humaocheng/CenterNet-master/heatmap.jpg",hm[0]*255)
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'ang': angs
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Beispiel #20
0
    def __next__(self):
        load_vid_time, img_transform_time, create_heatmap_time = 0, 0, 0
        start = time.time()
        if self.cap is None or self.count >= self.length:
            if self.cap is not None and self.vid_i == self.num_videos and self.loop:
                self.vid_i = 0
            elif self.cap is not None and self.vid_i == self.num_videos:
                raise StopIteration
            if self.opt.vidstream == 'skvideo':
                self.cap = skvideo.io.vread(self.video_paths[self.vid_i])
                metadata = skvideo.io.ffprobe(self.video_paths[self.vid_i])
                fr_lst = metadata['video']['@avg_frame_rate'].split('/')
                self.rate = int(fr_lst[0]) / int(fr_lst[1])
                self.length = int(metadata['video']['@nb_frames'])
            else:
                self.cap = cv2.VideoCapture(self.video_paths[self.vid_i])
                width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                self.rate = self.cap.get(cv2.CAP_PROP_FPS)
                self.length = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
                self.frame_gen = self._frame_from_video(self.cap)

            # self.detections = pickle.load(open(self.annotation_path[self.vid_i], 'rb'))
            self.count = 0
            self.vid_i += 1
        end_load_vid = time.time()
        load_vid_time = end_load_vid - start

        # load image depending on stream
        start_resize = time.time()
        if self.opt.vidstream == 'skvideo':
            img = self.cap[self.count]
        else:
            img = next(self.frame_gen)
            # in_h = int(original_img.shape[0] / self.opt.downsample)
            # in_w = int(original_img.shape[1] / self.opt.downsample)
            # img = cv2.resize(original_img, (in_w, in_h))
            # cv2.imwrite("/home/jl5/CenterNet/tmp.png", img)

        start_img_transform = time.time()
        anns = self.mmdetect_pred2inst(self.count)
        num_objs = min(len(anns), self.max_objs)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        # send to gpu
        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = torch.from_numpy(inp).cuda()
        inp = (inp.float() / 255.)

        # if self.split == 'train' and not self.opt.no_color_aug:
        #   color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

        inp = (inp - torch.from_numpy(self.mean).cuda()) / torch.from_numpy(
            self.std).cuda()
        inp = inp.permute(2, 0, 1)

        end_img_transform = time.time()
        img_transform_time = end_img_transform - start_img_transform

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)
        unconfident_hm = np.zeros((num_classes, output_h, output_w),
                                  dtype=np.float32)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []

        def show_bbox(im):
            fig, ax = plt.subplots(1)
            ax.imshow(im)
            for i in range(num_objs):
                bbox = np.array(anns[i]['bbox'], dtype=np.int32)
                bbox = bbox / self.opt.downsample
                rect = patches.Rectangle((bbox[0], bbox[1]),
                                         bbox[2] - bbox[0],
                                         bbox[3] - bbox[1],
                                         linewidth=1,
                                         edgecolor='r',
                                         facecolor='none')
                ax.add_patch(rect)
            plt.savefig('/home/jl5/CenterNet/tmp.png')
            pdb.set_trace()

        # detect = self.detections[self.count]
        # if self.opt.task == 'ctdet_semseg':
        #   seg_mask, weight_mask = batch_segmentation_masks(1, (720, 1280), np.array([detect['boxes']]), np.array([detect['classes']]), detect['masks'],
        #       np.array([detect['scores']]), [len(detect['boxes'])], True, coco_class_groups, mask_threshold=0.5, box_threshold=self.opt.center_thresh, scale_boxes=False)
        #   unbatch_seg = seg_mask[0].astype(np.uint8)
        #   unbatch_weight = weight_mask[0].astype(np.uint8)
        #   seg_mask = np.expand_dims(cv2.resize(unbatch_seg, (1280, 736)), axis=0).astype(np.int32)
        #   weight_mask = np.expand_dims(cv2.resize(unbatch_weight, (1280, 736)), axis = 0).astype(bool)

        start_detect = time.time()

        for k in range(num_objs):
            ann = anns[k]
            bbox = np.array(
                ann['bbox'],
                dtype=np.float32)  # self._coco_box_to_bbox(ann['bbox'])
            # bbox = bbox / self.opt.downsample # if need to downsample
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                if ann['score'] >= 0.3 and ann['score'] < 0.5:
                    draw_gaussian(unconfident_hm[cls_id], ct_int, radius)
                    reg_mask[k] = 0
                else:
                    draw_gaussian(hm[cls_id], ct_int, radius)
                    reg_mask[k] = 1
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int

                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])
        if self.opt.task == 'ctdet_semseg':
            ret = {
                'input': inp,
                'hm': hm,
                'reg_mask': reg_mask,
                'ind': ind,
                'wh': wh,
                'seg': seg_mask,
                'weight_seg': weight_mask
            }
        else:
            ret = {
                'input': inp,
                'hm': hm,
                'reg_mask': reg_mask,
                'ind': ind,
                'wh': wh,
                'unconf_hm': unconfident_hm
            }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                    np.zeros((1, 6), dtype=np.float32)
            # meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': self.count}
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': self.count}
            ret['meta'] = meta
        self.count += 1

        end_detect_time = time.time()
        create_heatmap_time = end_detect_time - start_detect
        # print("load vid {:.4f} | img transform {:.4f} | create instance {:.4f} \n".format(load_vid_time, img_transform_time, create_heatmap_time))
        return ret
Beispiel #21
0
    def __getitem__(self, index):
        #img_id = self.images[index]
        #file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        #img_path = os.path.join(self.img_dir, file_name)
        #ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        #anns = self.coco.loadAnns(ids=ann_ids)
        #num_objs = min(len(anns), self.max_objs)
        img_id = index
        img_path = self.images[index]
        label_path = self.label_files[index]
        #print(self.img_dir)
        #print(file_name)
        img = cv2.imread(img_path)
        h, w, _ = img.shape
        labels = []
        #print(img_path)
        #print(label_path)32
        #print(os.path.isfile(label_path))

        if os.path.isfile(label_path):
            # with open(label_path, 'r') as f:
            #     x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
            x = self.labels[index]
            #print(x)
            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2)
                labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2)
                labels[:, 3] = w * (x[:, 3])
                labels[:, 4] = h * (x[:, 4])
                #labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2)
                #labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2)
        #print('labels:{}'.format(len(labels)))

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp_ori = cv2.warpAffine(img,
                                 trans_input, (input_w, input_h),
                                 flags=cv2.INTER_LINEAR)

        inp = (inp_ori.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []
        #for k in range(num_objs):
        for k in range(len(labels)):
            ann = labels[k]
            #print('ann:{}'.format(ann))
            bbox = self._coco_box_to_bbox(ann[1:5])
            #print(index,bbox)
            #cv2.rectangle(img,(int(bbox[0]), int(bbox[1])),(int(bbox[2]), int(bbox[3])),(0,0,255),3)
            #print('bbox: ',bbox)
            cls_id = int(ann[0])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            #print('refined_bbox: ',bbox)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])
                #print('refined_bbox: ',[ct[0] - w / 2, ct[1] - h / 2,
                #               ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
                #cv2.rectangle(inp_ori,(int(ct[0] - w / 2)*self.opt.down_ratio, int(ct[1] - h / 2)*self.opt.down_ratio),(int(ct[0] + w / 2)*self.opt.down_ratio, int(ct[1] + h / 2)*self.opt.down_ratio),(0,0,255),3)
        #cv2.imshow('img',img)
        #cv2.imshow('img_ori',inp_ori)
        #cv2.waitKey(0)
        #cv2.destroyAllWindows()

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Beispiel #22
0
    def __getitem__(self, index):

        ######################################## Start of modified Code Block #####################################################
        curr_example = self.all_frames[index]
        img_path = curr_example[0]
        anns = curr_example[1]
        ######################################## End of modified Code Block #####################################################

        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)
        try:
            img_shape = img.shape
            self.last_img = img
        except AttributeError:
            print("Image '{}' failed!!!".format(img_path))
            self.failed_images.add(img_path)
            img = self.last_img

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:  # this is the default!
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        # GEO: we need to calculate the mean and std in datasets/dataset/gaila.py
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = len(list(self.cat_ids.keys()))
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
            draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ######################################## Start of modified Code Block #####################################################
            ann = anns.iloc[k]
            bbox = np.asarray([
                ann["topLeftX"], ann["topLeftY"], ann['bottomRightX'],
                ann['bottomRightY']
            ],
                              dtype=np.float32)
            cls_id = int(self.cat_ids[ann['name']])
            ######################################## End of modified Code Block #####################################################

            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)

            ######################################## Start of modified Code Block #####################################################
            _id = int(img_path.split('/')[-1].split('.')[0])
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': _id}
            ######################################## End of modified Code Block #####################################################

            ret['meta'] = meta
        return ret
Beispiel #23
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        # all anns of one img
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)

        # height, width
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)  # ori img center

        if self.opt.keep_res:  # False
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            # not keep_res, use opt.input_h, w
            # note: h != w, ori not keep_res, then set w=h=512
            # s = max(img.shape[0], img.shape[1]) * 1.0
            s = np.array([width, height], dtype=np.float32)  # ori img size?
            input_h, input_w = self.opt.input_h, self.opt.input_w

        # flip
        flipped = False

        # get scale and center to do affine transform
        if self.split == 'train':
            # random scale
            if not self.opt.not_rand_crop:
                # train set opt.not_rand_crop=False, so will use default random scale
                # s = s * np.random.choice(np.arange(0.4, 0.6, 0.1))  # (1920,1080) -> (640)
                # note: restrict the img center translate range, lrtb 1/2
                # w_border = self._get_border(img.shape[1] // 4, img.shape[1])
                # h_border = self._get_border(img.shape[0] // 4, img.shape[0])
                # random center, this may translate img so far
                w_range, h_range = img.shape[1] // 8, img.shape[0] // 8
                c[0] = np.random.randint(low=img.shape[1] // 2 - w_range,
                                         high=img.shape[1] // 2 + w_range)
                c[1] = np.random.randint(low=img.shape[0] // 2 - h_range,
                                         high=img.shape[0] // 2 + h_range)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            # random flip
            if np.random.random() < self.opt.flip:  # 0.5
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        # trans ori img to input size
        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        # use generated trans_input matrix to trans img
        inp = cv2.warpAffine(img, trans_input,
                             (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        # note: see trans img
        # print('scale:', s, 'center:', c)
        # cv2.imwrite('{}_img_trans.png'.format(img_id), inp)
        inp = (inp.astype(np.float32) / 255.)

        # color augment
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

        # normalize
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        # down sample
        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes

        # trans ori img box to output size
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        # draw gaussian core on heatmap
        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)  # 20
        # dense or sparse wh regress
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)  # (10,2) sparse!
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)  # dense!
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)  # (10,2)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)

        # msra, umich
        # opt.mse_loss = False
        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else draw_umich_gaussian

        # GT
        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])  # xywh -> x1y1x2y2; shape (4,)
            segmentation = np.array(ann['segmentation'][0]).reshape((-1, 2))  # x,y
            # map ori cat_id (whatever) to [0, num_class-1]
            cls_id = int(self.cat_ids[ann['category_id']])  # self.cat_ids in cigar.py
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1  # [0,2],
                segmentation[:, 0] = width - segmentation[:, 0] - 1  # flip x

            # transform box 2 pts to output
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)

            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]  # x1y1x2y2

            # transform segmentation, just trans polygon_center is enough
            polygon_center = self._get_polygon_center(segmentation)
            polygon_center = affine_transform(polygon_center, trans_output)
            print(polygon_center)

            if h > 0 and w > 0:
                # note: radius generated with spatial extent info from h,w
                radius = gaussian_radius(det_size=(math.ceil(h), math.ceil(w)))
                radius = max(0, int(math.ceil(radius / 3)))
                # radius = max(0, int(radius))
                # opt.mse_loss = False
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                # box center
                box_center = np.array([(bbox[0] + bbox[2]) / 2,
                                       (bbox[1] + bbox[3]) / 2], dtype=np.float32)
                print(box_center)
                # note: change ct to polygon center
                ct = polygon_center
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                # label of w,h
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]  # 1D ind of ct position
                # note: update offset
                reg[k] = box_center - ct_int  # float_box_center - int_polygon_center
                print('offset:', reg[k])
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)

                # use box_center to compute box
                ct = box_center.astype(np.int32)
                gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
                               ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }

        # from utils.plt_utils import plt_heatmaps
        # note: see heatmaps
        # plt_heatmaps(hm, basename='{}_hm'.format(img_id))
        # print(wh)

        if self.opt.dense_wh:  # False
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Beispiel #24
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        # (filepath, tempfilename) = os.path.split(img_path)
        # (filename, extension) = os.path.splitext(tempfilename)
        # kps_path = os.path.join('/media/srt/resource/Halcon_Project/L_shelf_dataset/HG_Dataset/kps',
        #                         filename + '_kps.npy')
        # kps_ann = np.load(kps_path)
        # print('load the kps!!!', kps_path)
        # c3= np.ones(6)
        # kps=np.column_stack((kps_ann,c3))
        # print(kps)

        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)

        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                # print('Random Crop Done')
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
                #输出scale的数值
                # print('s is :',s)
            else:
                # print('Do not Random Crop')
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        #对输入执行仿射变换
        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)
        #保存inp
        # print('type of inp is:',type(inp))
        # print('size of inp is:', inp.shape)
        #3通道的图像取一个维度就可以
        test_image = inp[1]

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2),
                               dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2),
                                 dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(self.cat_ids[ann['category_id']])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                #将高斯heatmap保存下来
                # print('shape of hp: ',hm.shape)
                heatmap = np.squeeze(hm)
                heatmap = cv2.resize(heatmap, (960, 640),
                                     interpolation=cv2.INTER_CUBIC)
                new_image = test_image + heatmap * 2
                array_name = 'visual_ann_' + str(index) + '.png'
                matplotlib.image.imsave(array_name, new_image)

                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2:cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2:cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k],
                                   radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
                    1, cls_id
                ])

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh
        }
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({
                'cat_spec_wh': cat_spec_wh,
                'cat_spec_mask': cat_spec_mask
            })
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Beispiel #25
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)
        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0
        rot = 0

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                #TODO这里是更改多尺度训练的地方。
                s = s  #* np.random.choice(np.arange(0.8, 1.5, 0.1))#change 0.6 1.4
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            if np.random.random() < self.opt.aug_rot:  # roate aug
                rf = self.opt.rotate
                rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

                #下面这段代码求旋转的角度
        if self.opt.angle_norm and self.split == 'train':
            angle_list = np.array(angle_list) % np.pi  #首先归一化到np.pi
            angle_int = (angle_list // (np.pi / 9)).astype('int')
            angle_b = np.bincount(angle_int)
            index_rot = np.argmax(angle_b)
            ind_rot = (angle_list >
                       (index_rot) * np.pi / 9) * (angle_list <=
                                                   (index_rot + 1) * np.pi / 9)
            angle_rot = np.average(angle_list[ind_rot])
            #这段代码是旋转图像,和中间点特征图,关键点特征图
            angle_img_rot = angle_rot * (-180) / np.pi
            hm_rotate = hm.transpose(1, 2, 0)
            M = cv2.getRotationMatrix2D(
                ((output_res) / 2.0, (output_res) / 2.0), angle_img_rot, 1)
            hm_rotate = cv2.warpAffine(hm_rotate, M, (output_res, output_res))
            hm = hm_rotate.transpose(2, 0, 1)
            hp_rotate = hm_hp.transpose(1, 2, 0)
            hp_rotate = cv2.warpAffine(hp_rotate, M, (output_res, output_res))
            hm_hp = hp_rotate[np.newaxis, :]
            M = cv2.getRotationMatrix2D(
                ((self.opt.input_res) / 2.0, (self.opt.input_res) / 2.0),
                angle_img_rot, 1)
            inp = inp.transpose(1, 2, 0)
            inp = cv2.warpAffine(inp, M,
                                 (self.opt.input_res, self.opt.input_res))
            inp = inp.transpose(2, 0, 1)
            # inp1=cv2.warpAffine(inp1,M,(self.opt.input_res,self.opt.input_res))
            #结束

        trans_input = get_affine_transform(
            c, s, rot, [self.opt.input_res, self.opt.input_res])
        # inp1 = cv2.warpAffine(img, trans_input,
        #                      (self.opt.input_res, self.opt.input_res),
        #                      flags=cv2.INTER_LINEAR)
        inp = cv2.warpAffine(img,
                             trans_input,
                             (self.opt.input_res, self.opt.input_res),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_res = self.opt.output_res
        num_joints = self.num_joints
        trans_output_rot = get_affine_transform(c, s, rot,
                                                [output_res, output_res])
        trans_output = get_affine_transform(c, s, 0, [output_res, output_res])

        hm = np.zeros((self.num_classes, output_res, output_res),
                      dtype=np.float32)
        hm_hp = np.zeros((num_joints, output_res, output_res),
                         dtype=np.float32)
        dense_kps = np.zeros((num_joints, 2, output_res, output_res),
                             dtype=np.float32)
        dense_kps_mask = np.zeros((num_joints, output_res, output_res),
                                  dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        kps_mask = np.zeros((self.max_objs, self.num_joints * 2),
                            dtype=np.uint8)
        hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
        hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
        hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []
        angle_list = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            #TODO change wwlekeuihx
            cls_id = int(ann['category_id']) - 1
            pts = np.array(ann['keypoints'][0:3],
                           np.float32).reshape(num_joints, 3)  #tmjx
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                pts[:, 0] = width - pts[:, 0] - 1
                #for e in self.flip_idx:
                #pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            #bbox = np.clip(bbox, 0, output_res - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]

            h = np.clip(h, 0, output_res - 1)
            w = np.clip(w, 0, output_res - 1)
            if (h > 0 and w > 0) or (rot != 0):
                radius = gaussian_radius((math.ceil(h), math.ceil(w))) * 1.2
                sqrt_wh = np.sqrt(np.sqrt(h * w))
                radius_w = radius * np.sqrt(w) / sqrt_wh
                radius_h = radius * np.sqrt(h) / sqrt_wh
                radius_w = self.opt.hm_gauss if self.opt.mse_loss else max(
                    0, np.ceil(radius_w))
                radius_h = self.opt.hm_gauss if self.opt.mse_loss else max(
                    0, np.ceil(radius_h))
                # radius = self.opt.hm_gauss if self.opt.mse_loss else max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)

                ct[0] = np.clip(ct[0], 0, output_res - 1)
                ct[1] = np.clip(ct[1], 0, output_res - 1)

                ct_int = ct.astype(np.int32)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_res + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                num_kpts = pts[:, 2].sum()
                if num_kpts == 0:
                    hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
                    reg_mask[k] = 0

                hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                hp_radius = self.opt.hm_gauss \
                            if self.opt.mse_loss else max(0, int(hp_radius))
                for j in range(num_joints):
                    if pts[j, 2] > 0:
                        pts[j, :2] = affine_transform(pts[j, :2],
                                                      trans_output_rot)
                        if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
                           pts[j, 1] >= 0 and pts[j, 1] < output_res:
                            kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int
                            kps_mask[k, j * 2:j * 2 + 2] = 1
                            pt_int = pts[j, :2].astype(np.int32)
                            hp_offset[k * num_joints + j] = pts[j, :2] - pt_int
                            hp_ind[k * num_joints +
                                   j] = pt_int[1] * output_res + pt_int[0]
                            hp_mask[k * num_joints + j] = 1
                            if self.opt.dense_hp:
                                # must be before draw center hm gaussian
                                draw_dense_reg(dense_kps[j],
                                               hm[cls_id],
                                               ct_int,
                                               pts[j, :2] - ct_int,
                                               radius,
                                               is_offset=True)
                                draw_gaussian(dense_kps_mask[j], ct_int,
                                              radius)
                            draw_gaussian(hm_hp[j], pt_int, hp_radius)
                #TODO change
                angle = math.atan2((pts[0, 0] - ct[0]), (pts[0, 1] - ct[1]))
                angle_list.append(angle)
                draw_gaussian(hm[cls_id], ct_int, [radius_w, radius_h, angle])
                # draw_gaussian(hm[cls_id], ct_int, radiusw,radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] +
                    h / 2, 1
                ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])
        if rot != 0:
            hm = hm * 0 + 0.9999
            reg_mask *= 0
            kps_mask *= 0

        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'hps': kps,
            'hps_mask': kps_mask
        }

        if self.opt.dense_hp:
            dense_kps = dense_kps.reshape(num_joints * 2, output_res,
                                          output_res)
            dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res,
                                                    output_res)
            dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask],
                                            axis=1)
            dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res,
                                                    output_res)
            ret.update({
                'dense_hps': dense_kps,
                'dense_hps_mask': dense_kps_mask
            })
            del ret['hps'], ret['hps_mask']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.hm_hp:
            ret.update({'hm_hp': hm_hp})
        if self.opt.reg_hp_offset:
            ret.update({
                'hp_offset': hp_offset,
                'hp_ind': hp_ind,
                'hp_mask': hp_mask
            })
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 40), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta


#这里是调试可视化生成的特征图的程序
# debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug==3),
#                     theme=self.opt.debugger_theme)
# self.debug(debugger, inp1,  ret)
        return ret
Beispiel #26
0
  def __getitem__(self, index):
    img_id = self.images[index]
    file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
    img_path = os.path.join(self.img_dir, file_name)



    ann_ids = self.coco.getAnnIds(imgIds=[img_id])
    anns = self.coco.loadAnns(ids=ann_ids)

    num_objs = min(len(anns), self.max_objs)

    channel_counter = 1  # len(self.coco.getCatIds())

    # target
    # target_img = cv2.imread(img_path)

    N_FRAMES = 11
    middle = int(N_FRAMES/2)
    index = os.path.basename(img_path).replace('.jpg', '').replace('img', '').replace('.JPEG', '')
    rest = img_path.replace(index + '.jpg', '').replace(os.path.dirname(img_path), '')
    length = len(index)
    modulo = '1'
    for i in range(length):
      modulo += '0'

    img_paths = []
    for i in range(N_FRAMES):
      new_img_path = os.path.dirname(img_path) \
                     + rest \
                     + str((int(index) - (i-middle)) % int(modulo)).zfill(length) + '.jpg'
      if not os.path.exists(new_img_path):
        new_img_path = img_path
      img_paths.append(new_img_path)

    imgs = []
    for path in img_paths:
      imgs.append(cv2.imread(path))
    img = np.concatenate(imgs, -1)

    bboxes = {}
    for ann in anns:
      if str(ann['category_id']) in bboxes:
        bboxes[str(ann['category_id'])].append([int(ann['bbox'][0]),
                      int(ann['bbox'][1]),
                     int(ann['bbox'][0] + ann['bbox'][2]),
                      int(ann['bbox'][1] + ann['bbox'][3])])
      else:
        bboxes[str(ann['category_id'])] = [[int(ann['bbox'][0]),
                                     int(ann['bbox'][1]),
                                     int(ann['bbox'][0] + ann['bbox'][2]),
                                     int(ann['bbox'][1] + ann['bbox'][3])]]
    seg_img = np.zeros([channel_counter, img.shape[0], img.shape[1]])
    for label in range(1, channel_counter+1):
      if str(label) in bboxes:
        for bbox in bboxes[str(label)]:
          seg_img[label-1, bbox[1]:bbox[3], bbox[0]:bbox[2]] = 255


    height, width = img.shape[0], img.shape[1]
    c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
    if self.opt.keep_res:
      input_h = (height | self.opt.pad) + 1
      input_w = (width | self.opt.pad) + 1
      s = np.array([input_w, input_h], dtype=np.float32)
    else:
      s = max(img.shape[0], img.shape[1]) * 1.0
      input_h, input_w = self.opt.input_h, self.opt.input_w

    flipped = False
    if self.split == 'train':
      if not self.opt.not_rand_crop:
        s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
        w_border = self._get_border(128, img.shape[1])
        h_border = self._get_border(128, img.shape[0])
        c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
        c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
      else:
        sf = self.opt.scale
        cf = self.opt.shift
        c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
        c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
        s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)

      if np.random.random() < self.opt.flip:
        flipped = True

        # target
        # target_img = target_img[:, ::-1, :]

        img = img[:, ::-1, :]
        c[0] =  width - c[0] - 1


    trans_input = get_affine_transform(
      c, s, 0, [input_w, input_h])
    seg_inp = np.zeros((seg_img.shape[0], input_w, input_h))
    for channel in range(seg_img.shape[0]):
      seg_inp[channel, :, :] = cv2.warpAffine(seg_img[channel, :, :], trans_input,
                                              (input_w, input_h),
                                              flags=cv2.INTER_LINEAR)
    # print('pre: ', img.shape)
    # target
    # target_inp = cv2.warpAffine(target_img, trans_input,(input_w, input_h),flags=cv2.INTER_LINEAR)

    inp = np.zeros((input_w, input_h, N_FRAMES*3))
    if inp.shape[2] == N_FRAMES*3:
      for i in range(N_FRAMES):
        inp[:, :, i*3:i*3+3] = cv2.warpAffine(img[:, :, i*3:i*3+3], trans_input,
                              (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
    # print('post: ', inp.shape)
    # target
    # target_inp = (target_inp.astype(np.float32) / 255.)

    inp = (inp.astype(np.float32) / 255.)

    seg_inp = (seg_inp.astype(np.float32) / 255.) # hughes


    # print('np.mean(inp), PRE: ', np.mean(inp))
    if inp.shape[2] == N_FRAMES*3:
      for i in range(N_FRAMES):
        if self.split == 'train' and not self.opt.no_color_aug:
          color_aug(self._data_rng, inp[:, :, i*3:i*3+3], self._eig_val, self._eig_vec)
    else:
      if self.split == 'train' and not self.opt.no_color_aug:
        color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)

    # target
    # color_aug(self._data_rng, target_inp, self._eig_val, self._eig_vec)
    
    # print('np.mean(inp), POST: ', np.mean(inp))
    if inp.shape[2] == N_FRAMES*3:
      for i in range(N_FRAMES):
        inp[:, :, i*3:i*3+3] = (inp[:, :, i*3:i*3+3] - self.mean) / self.std
    else:
      inp = (inp - self.mean) / self.std
    # target
    # target_inp = (target_inp - self.mean) / self.std

    inp = inp.transpose(2, 0, 1)

    # target
    # target_inp = target_inp.transpose(2, 0, 1)

    output_h = input_h // self.opt.down_ratio
    output_w = input_w // self.opt.down_ratio
    num_classes = self.num_classes
    trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

    hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
    wh = np.zeros((self.max_objs, 2), dtype=np.float32)
    dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
    reg = np.zeros((self.max_objs, 2), dtype=np.float32)
    ind = np.zeros((self.max_objs), dtype=np.int64)
    reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
    cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
    cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)

    draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                    draw_umich_gaussian

    gt_det = []
    for k in range(num_objs):
      ann = anns[k]
      bbox = self._coco_box_to_bbox(ann['bbox'])
      cls_id = int(self.cat_ids[ann['category_id']])
      if flipped:
        bbox[[0, 2]] = width - bbox[[2, 0]] - 1
      bbox[:2] = affine_transform(bbox[:2], trans_output)
      bbox[2:] = affine_transform(bbox[2:], trans_output)
      bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
      bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
      h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
      if h > 0 and w > 0:
        radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        radius = max(0, int(radius))
        radius = self.opt.hm_gauss if self.opt.mse_loss else radius
        ct = np.array(
          [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
        ct_int = ct.astype(np.int32)
        draw_gaussian(hm[cls_id], ct_int, radius)
        wh[k] = 1. * w, 1. * h
        ind[k] = ct_int[1] * output_w + ct_int[0]
        reg[k] = ct - ct_int
        reg_mask[k] = 1
        cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
        cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
        if self.opt.dense_wh:
          draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
        gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
                       ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])

    # write_hm = cv2.resize(((hm-np.min(hm)/np.max(hm))*255).astype(np.uint8).squeeze(0), (512, 512))
    # cv2.imwrite('/store/datasets/UA-Detrac/test_sample/VID_HM/'  + 'inp_' + os.path.basename(file_name) + '_' + 'HM.jpg', write_hm)

    ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'seg': seg_inp}  # 'seg': np.expand_dims(seg_inp, 0)}
    if self.opt.dense_wh:
      hm_a = hm.max(axis=0, keepdims=True)
      dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
      ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
      del ret['wh']
    elif self.opt.cat_spec_wh:
      ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
      del ret['wh']
    if self.opt.reg_offset:
      ret.update({'reg': reg})
    if self.opt.debug > 0 or not self.split == 'train':
      gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
               np.zeros((1, 6), dtype=np.float32)
      meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
      ret['meta'] = meta

    # if inp.shape[0] == N_FRAMES*3:
    #   for i in range(N_FRAMES):
    #     img_test = (inp[i*3:i*3+3, :, :].transpose(1, 2, 0) * 255).astype(np.uint8)
    #     cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/test_sample/VID_HM/", 'inp_' + os.path.basename(file_name) + '_' + str(i)), img_test)

    #img_test = (target_inp.transpose(1, 2, 0) * 255).astype(np.uint8)
    # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/exp/tensors/VID_HM/", os.path.basename(file_name) + '_target'), img_test)

    # cv2.imwrite(os.path.join("/store/datasets/UA-Detrac/COCO-format/img_tests/", "inp_" + os.path.basename(file_name)).replace('.jpg', '_seg.jpg'), (seg_inp.transpose(1, 2, 0) * 255).astype(np.uint8))

    # exit()
    return ret
Beispiel #27
0
    def __getitem__(self, index):
        # print('--------------->>>> multi pose index',index)
        # print('--------------->>>> multi pose index',index)
        # print('--------------->>>> multi pose index',index)
        # print('--------------->>>> multi pose index',index)
        img_id = self.images[index]
        # print('--------------->>>> multi pose ',img_id)
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0
        rot = 0

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            if np.random.random() < self.opt.aug_rot:
                rf = self.opt.rotate
                rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(
            c, s, rot, [self.opt.input_res, self.opt.input_res])
        inp = cv2.warpAffine(img,
                             trans_input,
                             (self.opt.input_res, self.opt.input_res),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_res = self.opt.output_res
        num_joints = self.num_joints
        trans_output_rot = get_affine_transform(c, s, rot,
                                                [output_res, output_res])
        trans_output = get_affine_transform(c, s, 0, [output_res, output_res])

        hm = np.zeros((self.num_classes, output_res, output_res),
                      dtype=np.float32)
        hm_hp = np.zeros((num_joints, output_res, output_res),
                         dtype=np.float32)
        dense_kps = np.zeros((num_joints, 2, output_res, output_res),
                             dtype=np.float32)
        dense_kps_mask = np.zeros((num_joints, output_res, output_res),
                                  dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        kps_mask = np.zeros((self.max_objs, self.num_joints * 2),
                            dtype=np.uint8)
        hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
        hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
        hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(ann['category_id']) - 1
            pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3)
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                pts[:, 0] = width - pts[:, 0] - 1
                for e in self.flip_idx:
                    pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox = np.clip(bbox, 0, output_res - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if (h > 0 and w > 0) or (rot != 0):
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = self.opt.hm_gauss if self.opt.mse_loss else max(
                    0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_res + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                num_kpts = pts[:, 2].sum()
                if num_kpts == 0:
                    hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
                    reg_mask[k] = 0

                hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                hp_radius = self.opt.hm_gauss \
                            if self.opt.mse_loss else max(0, int(hp_radius))
                for j in range(num_joints):
                    if pts[j, 2] > 0:
                        pts[j, :2] = affine_transform(pts[j, :2],
                                                      trans_output_rot)
                        if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
                           pts[j, 1] >= 0 and pts[j, 1] < output_res:
                            kps[k, j * 2:j * 2 + 2] = pts[j, :2] - ct_int
                            kps_mask[k, j * 2:j * 2 + 2] = 1
                            pt_int = pts[j, :2].astype(np.int32)
                            hp_offset[k * num_joints + j] = pts[j, :2] - pt_int
                            hp_ind[k * num_joints +
                                   j] = pt_int[1] * output_res + pt_int[0]
                            hp_mask[k * num_joints + j] = 1
                            if self.opt.dense_hp:
                                # must be before draw center hm gaussian
                                draw_dense_reg(dense_kps[j],
                                               hm[cls_id],
                                               ct_int,
                                               pts[j, :2] - ct_int,
                                               radius,
                                               is_offset=True)
                                draw_gaussian(dense_kps_mask[j], ct_int,
                                              radius)
                            draw_gaussian(hm_hp[j], pt_int, hp_radius)
                draw_gaussian(hm[cls_id], ct_int, radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] +
                    h / 2, 1
                ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])
        if rot != 0:
            hm = hm * 0 + 0.9999
            reg_mask *= 0
            kps_mask *= 0
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'hps': kps,
            'hps_mask': kps_mask
        }
        if self.opt.dense_hp:
            dense_kps = dense_kps.reshape(num_joints * 2, output_res,
                                          output_res)
            dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res,
                                                    output_res)
            dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask],
                                            axis=1)
            dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res,
                                                    output_res)
            ret.update({
                'dense_hps': dense_kps,
                'dense_hps_mask': dense_kps_mask
            })
            del ret['hps'], ret['hps_mask']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.hm_hp:
            ret.update({'hm_hp': hm_hp})
        if self.opt.reg_hp_offset:
            ret.update({
                'hp_offset': hp_offset,
                'hp_ind': hp_ind,
                'hp_mask': hp_mask
            })
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 40), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret
Beispiel #28
0
    def __getitem__(self, index):
        img_path = self.images[index]
        ann_path = img_path.replace('Data', 'Annotations').replace(os.path.splitext(img_path)[-1], '.xml')
        
        anns = []
        root = ET.parse(ann_path).getroot()
        #im_w = int(root.find('size/width').text)
        #im_h = int(root.find('size/height').text)
        for obj in root.findall('object'):
            cls = obj.find('name').text
            if cls not in self.id2idx:
                continue
            cls = self.id2idx[cls]
            x1 = int(obj.find('bndbox/xmin').text)
            y1 = int(obj.find('bndbox/ymin').text)
            x2 = int(obj.find('bndbox/xmax').text)
            y2 = int(obj.find('bndbox/ymax').text)
            #x = 0.5 * (x1 + x2) / im_w
            #y = 0.5 * (y1 + y2) / im_h
            #ww = (x2 - x1) / im_w
            #hh = (y2 - y1) / im_h
            anns.append(np.array([cls, x1, y1, x2, y2], dtype=np.float32))
        #l = np.array(boxes, dtype=np.float32)
        num_objs = min(len(anns), self.max_objs)

        img = cv2.imread(img_path)
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        if self.opt.keep_res:
            input_h = (height | self.opt.pad) + 1
            input_w = (width | self.opt.pad) + 1
            s = np.array([input_w, input_h], dtype=np.float32)
        else:
            s = max(img.shape[0], img.shape[1]) * 1.0
            input_h, input_w = self.opt.input_h, self.opt.input_w

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
                w_border = self._get_border(128, img.shape[1])
                h_border = self._get_border(128, img.shape[0])
                c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
                c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
                s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.opt.down_ratio
        output_w = input_w // self.opt.down_ratio
        num_classes = self.num_classes
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
        cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = ann[1:]
            cls_id = int(ann[0])
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
                ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
                ct_int = ct.astype(np.int32)
                draw_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
                cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
                if self.opt.dense_wh:
                    draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
                gt_det.append([ct[0] - w / 2, ct[1] - h / 2, 
                               ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])

        ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh}
        if self.opt.dense_wh:
            hm_a = hm.max(axis=0, keepdims=True)
            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
            del ret['wh']
        elif self.opt.cat_spec_wh:
            ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
            del ret['wh']
        if self.opt.reg_offset:
            ret.update({'reg': reg})
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else np.zeros((1, 6), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_path}
            ret['meta'] = meta
        return ret
Beispiel #29
0
  def __getitem__(self, index):
    img_id = self.images[index]
  
    inp, ann_list, output_w, output_h, meta = self.get_img_ann(index, scale_lv=2)
    
    # TBD: Mosaic augmentation requires large input image size
    # Increase input image size from 512x512 to 800x800 or larger and
    # adjust the scale level to avoid the mosaic boundary to become 
    # a significant boundary of objects
    #inp, ann_list, output_w, output_h, meta = self.mosaic_mix( index )
    
    if False: # Augmnetation visualization
      img = inp.transpose(1, 2, 0)
      img = (img*self.std + self.mean)*255
      for an in ann_list:
        bbox, cls_id, bbox2 = an
        bbox = bbox.astype(np.int32)
        bbox2 = bbox2.astype(np.int32)
        bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, img.shape[1])
        bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, img.shape[0])
        bbox2[[0, 2]] = np.clip(bbox2[[0, 2]], 0, img.shape[1])
        bbox2[[1, 3]] = np.clip(bbox2[[1, 3]], 0, img.shape[0])
        if bbox[2] - bbox[0] > 0 and bbox[3] - bbox[1] > 0:
          cv2.rectangle(img, (bbox[0],bbox[1]), (bbox[2],bbox[3]), (255,0,0), 3)
        if bbox2.shape[0] > 0:
          cv2.rectangle(img, (bbox2[0],bbox2[1]), (bbox2[2],bbox2[3]), (0,255,0), 2)
      cv2.imwrite('temp_%d.jpg'%(index),img)
    
    num_objs = min(len(ann_list), self.max_objs)
    num_classes = self.num_classes
    hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
    wh = np.zeros((self.max_objs, 2), dtype=np.float32)
    dense_reg = np.zeros((4, output_h, output_w), dtype=np.float32)
    reg = np.zeros((self.max_objs, 2), dtype=np.float32)
    ind = np.zeros((self.max_objs), dtype=np.int64)
    reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
    dense_wh_mask = np.zeros((4, output_h, output_w), dtype=np.float32)
    cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
    cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8)
    
    draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                    draw_umich_gaussian

    gt_det = []
    xs = np.random.randint(output_w, size=(self.max_objs, 1))
    ys = np.random.randint(output_h, size=(self.max_objs, 1))
    bgs = np.concatenate([xs,ys], axis=1)
    
    for k in range(num_objs):
      bbox, cls_id, bbox2 = ann_list[k]
      
      bbox /= self.opt.down_ratio
      bbox2 /= self.opt.down_ratio

      oh, ow = bbox[3] - bbox[1], bbox[2] - bbox[0]
      bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w)
      bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h)
      h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
      
      if (h/(oh+0.01) < 0.9 or  w/(ow+0.01) < 0.9) and bbox2.shape[0] > 0:
        bbox = bbox2
        bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w)
        bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h)
        h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
      #get center of box
      ct = np.array(
          [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
      ct_int = ct.astype(np.int32)

      if (h > 2 or h/(oh+0.01) > 0.5) and (w > 2 or w/(ow+0.01) > 0.5):
        radius = gaussian_radius((math.ceil(h), math.ceil(w)))
        radius = max(0, int(radius))
        radius = self.opt.hm_gauss if self.opt.mse_loss else radius

        draw_dense_reg(dense_reg, dense_wh_mask, ct_int, bbox, radius)
        draw_gaussian(hm[cls_id], ct_int, radius)

        cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
        cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
        gt_det.append([ct[0] - w / 2, ct[1] - h / 2, 
                       ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
    
    dense_wh  = dense_reg[:2,:,:]
    dense_off = dense_reg[2:,:,:]

    ret = {'input': inp, 'hm': hm, 'dense_wh': dense_wh, 'dense_off': dense_off, 
           'dense_wh_mask': dense_wh_mask[:2]}
    if self.opt.cat_spec_wh:
      ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
      del ret['wh']
    if self.opt.reg_offset:
      ret.update({'reg': reg})
    if self.opt.debug > 0 or not self.split == 'train':
      gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
               np.zeros((1, 6), dtype=np.float32)
      meta = {'c': meta[0], 's': meta[1], 'gt_det': gt_det, 'img_id': img_id}
      ret['meta'] = meta
    return ret
Beispiel #30
0
    def __getitem__(self, index):
        img_id = self.images[index]
        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
        img_path = os.path.join(self.img_dir, file_name)
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ids=ann_ids)
        num_objs = len(anns)
        # num_objs = min(len(anns), self.max_objs)
        if num_objs > self.max_objs:
            num_objs = self.max_objs
            anns = np.random.choice(anns, num_objs)

        img = cv2.imread(img_path)

        img, anns = Data_anchor_sample(img, anns)

        # # for test the keypoint order
        # img1 = cv2.flip(img,1)
        # for ann in anns:
        #   width = img1.shape[1]
        #   bbox = self._coco_box_to_bbox(ann['bbox'])
        #   bbox[[0, 2]] = width - bbox[[2, 0]] - 1
        #   pts = np.array(ann['keypoints'], np.float32).reshape(5, 3)
        #
        #   # for flip
        #   pts[:, 0] = width - pts[:, 0] - 1
        #   for e in self.flip_idx:
        #     pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()
        #
        #   # for debug show
        #   def add_coco_bbox(image, bbox, conf=1):
        #     txt = '{}{:.1f}'.format('person', conf)
        #     font = cv2.FONT_HERSHEY_SIMPLEX
        #     cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 255), 2)
        #     cv2.putText(image, txt, (bbox[0], bbox[1] - 2),
        #                 font, 0.5, (0, 255, 0), thickness=1, lineType=cv2.LINE_AA)
        #
        #   def add_coco_hp(image, points, keypoints_prob=1):
        #     for j in range(5):
        #       if keypoints_prob > 0.5:
        #         if j == 0:
        #           cv2.circle(image, (points[j, 0], points[j, 1]), 2, (255, 255, 0), -1)
        #         elif j == 1:
        #           cv2.circle(image, (points[j, 0], points[j, 1]), 2, (255, 0, 0), -1)
        #         elif j == 2:
        #           cv2.circle(image, (points[j, 0], points[j, 1]), 2, (0, 255, 0), -1)
        #         elif j == 3:
        #           cv2.circle(image, (points[j, 0], points[j, 1]), 2, (0, 0, 255), -1)
        #         elif j == 4:
        #           cv2.circle(image, (points[j, 0], points[j, 1]), 2, (0, 0, 0), -1)
        #     return image
        #
        #   bbox = [int(x) for x in bbox]
        #   add_coco_bbox(img1, bbox )
        #   add_coco_hp(img1, pts)
        #   cv2.imshow('mat', img1)
        #   cv2.waitKey(5000)

        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0
        rot = 0

        flipped = False
        if self.split == 'train':
            if not self.opt.not_rand_crop:
                # s = s * np.random.choice(np.arange(0.8, 1.1, 0.1))
                s = s
                # _border = np.random.randint(128*0.4, 128*1.4)
                _border = s * np.random.choice([0.1, 0.2, 0.25])
                w_border = self._get_border(_border, img.shape[1])
                h_border = self._get_border(_border, img.shape[0])
                c[0] = np.random.randint(low=w_border,
                                         high=img.shape[1] - w_border)
                c[1] = np.random.randint(low=h_border,
                                         high=img.shape[0] - h_border)
            else:
                sf = self.opt.scale
                cf = self.opt.shift
                c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
                s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            if np.random.random() < self.opt.aug_rot:
                rf = self.opt.rotate
                rot = np.clip(np.random.randn() * rf, -rf * 2, rf * 2)

            if np.random.random() < self.opt.flip:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(
            c, s, rot, [self.opt.input_res, self.opt.input_res])
        inp = cv2.warpAffine(img,
                             trans_input,
                             (self.opt.input_res, self.opt.input_res),
                             flags=cv2.INTER_LINEAR)

        inp = (inp.astype(np.float32) / 255.)
        if self.split == 'train' and not self.opt.no_color_aug:  # 随机进行图片增强
            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
            # inp = Randaugment(self._data_rng, inp, self._eig_val, self._eig_vec)

        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_res = self.opt.output_res
        num_joints = self.num_joints
        trans_output_rot = get_affine_transform(c, s, rot,
                                                [output_res, output_res])
        trans_output = get_affine_transform(c, s, 0, [output_res, output_res])

        hm = np.zeros((self.num_classes, output_res, output_res),
                      dtype=np.float32)
        hm_hp = np.zeros((num_joints, output_res, output_res),
                         dtype=np.float32)
        dense_kps = np.zeros((num_joints, 2, output_res, output_res),
                             dtype=np.float32)
        dense_kps_mask = np.zeros((num_joints, output_res, output_res),
                                  dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        wight_mask = np.ones((self.max_objs), dtype=np.float32)
        kps_mask = np.zeros((self.max_objs, self.num_joints * 2),
                            dtype=np.uint8)
        hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
        hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
        hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)

        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
                        draw_umich_gaussian

        gt_det = []
        for k in range(num_objs):
            ann = anns[k]
            bbox = self._coco_box_to_bbox(ann['bbox'])
            cls_id = int(ann['category_id']) - 1
            pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3)
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                pts[:, 0] = width - pts[:, 0] - 1
                for e in self.flip_idx:
                    pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()

            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox = np.clip(bbox, 0, output_res - 1)
            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if (h > 0 and w > 0) or (rot != 0):
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = self.opt.hm_gauss if self.opt.mse_loss else max(
                    0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)  # 人脸的中心坐标
                ct_int = ct.astype(np.int32)  # 整数化
                # wh[k] = 1. * w, 1. * h                                                    # 2. centernet的方式
                wh[k] = np.log(1. * w / 4), np.log(
                    1. * h / 4)  # 2. 人脸bbox的高度和宽度,centerface论文的方式
                ind[k] = ct_int[1] * output_res + ct_int[
                    0]  # 人脸bbox在1/4特征图中的索引
                reg[k] = ct - ct_int  # 3. 人脸bbox中心点整数化的偏差
                reg_mask[k] = 1  # 是否需要用于计算误差
                # if w*h <= 20:
                #     wight_mask[k] = 15

                num_kpts = pts[:, 2].sum()  # 没有关键点标注的时哦
                if num_kpts == 0:  # 没有关键点标注的都是比较困难的样本
                    # print('没有关键点标注')
                    hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
                    # reg_mask[k] = 0

                hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                hp_radius = self.opt.hm_gauss \
                            if self.opt.mse_loss else max(0, int(hp_radius))
                for j in range(num_joints):
                    if pts[j, 2] > 0:
                        pts[j, :2] = affine_transform(pts[j, :2],
                                                      trans_output_rot)
                        if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
                           pts[j, 1] >= 0 and pts[j, 1] < output_res:
                            kps[k, j * 2:j * 2 + 2] = pts[
                                j, :2] - ct_int  # 4. 关键点相对于人脸bbox的中心的偏差
                            kps_mask[k, j * 2:j * 2 + 2] = 1
                            pt_int = pts[j, :2].astype(np.int32)  # 关键点整数化
                            hp_offset[k * num_joints +
                                      j] = pts[j, :2] - pt_int  # 关键点整数化的偏差
                            hp_ind[
                                k * num_joints +
                                j] = pt_int[1] * output_res + pt_int[0]  # 索引
                            hp_mask[k * num_joints + j] = 1  # 计算损失的mask
                            if self.opt.dense_hp:
                                # must be before draw center hm gaussian
                                draw_dense_reg(dense_kps[j],
                                               hm[cls_id],
                                               ct_int,
                                               pts[j, :2] - ct_int,
                                               radius,
                                               is_offset=True)
                                draw_gaussian(dense_kps_mask[j], ct_int,
                                              radius)
                            draw_gaussian(hm_hp[j], pt_int,
                                          hp_radius)  # 1. 关键点高斯map
                            if ann['bbox'][2] * ann['bbox'][
                                    3] <= 16.0:  # 太小的人脸忽略
                                kps_mask[k, j * 2:j * 2 + 2] = 0
                draw_gaussian(hm[cls_id], ct_int, radius)
                gt_det.append([
                    ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] +
                    h / 2, 1
                ] + pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])
        if rot != 0:
            hm = hm * 0 + 0.9999
            reg_mask *= 0
            kps_mask *= 0
        ret = {
            'input': inp,
            'hm': hm,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'landmarks': kps,
            'hps_mask': kps_mask,
            'wight_mask': wight_mask
        }
        if self.opt.dense_hp:
            dense_kps = dense_kps.reshape(num_joints * 2, output_res,
                                          output_res)
            dense_kps_mask = dense_kps_mask.reshape(num_joints, 1, output_res,
                                                    output_res)
            dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask],
                                            axis=1)
            dense_kps_mask = dense_kps_mask.reshape(num_joints * 2, output_res,
                                                    output_res)
            ret.update({
                'dense_hps': dense_kps,
                'dense_hps_mask': dense_kps_mask
            })
            del ret['hps'], ret['hps_mask']
        if self.opt.reg_offset:
            ret.update({'hm_offset': reg})  # 人脸bbox中心点整数化的偏差
        if self.opt.hm_hp:
            ret.update({'hm_hp': hm_hp})
        if self.opt.reg_hp_offset:
            ret.update({
                'hp_offset': hp_offset,
                'hp_ind': hp_ind,
                'hp_mask': hp_mask
            })
        if self.opt.debug > 0 or not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                     np.zeros((1, 40), dtype=np.float32)
            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
            ret['meta'] = meta
        return ret