예제 #1
0
def expand_feature(center_x, center_y, box_width, box_height, feature_width,
                   feature_height):
    hm = np.zeros((feature_height, feature_width), dtype=np.float32)
    center_x_int = int(center_x)
    center_y_int = int(center_y)
    radius = gaussian_radius((box_width, box_height))
    radius = int(radius)
    ct = np.array([center_x, center_y], dtype=np.float32)
    ct_int = ct.astype(np.int32)
    draw_umich_gaussian(hm, ct_int, radius)
    return hm
예제 #2
0
def guassian(mask):
    mask = imresize(mask, (119, 119), interp='nearest')
    mask = np.array(mask, dtype=np.int32)
    mask[mask!=0]=1
    bbox = extract_bboxes(mask)
    h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
    ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
    ct_int = ct.astype(np.int32)
    output_h, output_w = mask.shape[0], mask.shape[1]
    hm = np.zeros((1, output_h, output_w), dtype=np.float32)
    radius = gaussian_radius((math.ceil(h), math.ceil(w)))
    radius = max(0, int(radius))
    # print(radius)
    # wh[k] = 1. * w, 1. * h
    # ind[k] = ct_int[1] * output_w + ct_int[0]
    # reg[k] = ct - ct_int
    # reg_mask[k] = 1
    hm[0] = draw_umich_gaussian(hm[0], ct_int, radius)
    return hm
    def __getitem__(self, index):
        if os.path.exists(self.imgs_path[index]):
            img = cv2.imread(self.imgs_path[index])
        else:
            print("%s not exists" % self.imgs_path[index])
        anns = np.array(self.words[index])
        bboxes = anns[:, :4]
        bboxes = np.array([self._coco_box_to_bbox(bb) for bb in bboxes])
        lms = np.zeros((anns.shape[0], 10), dtype=np.float32)
        if self.split == "train":
            for idx, ann in enumerate(anns):
                lm = np.zeros(10, dtype=np.float32) - 1
                if ann[4] >= 0:
                    for i in range(5):

                        lm[i * 2] = ann[4 + 3 * i]
                        lm[i * 2 + 1] = ann[4 + 3 * i + 1]
                lms[idx] = lm
        num_objs = min(len(anns), self.max_objs)
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0
        input_h, input_w = self.default_resolution[0], self.default_resolution[
            1]

        flipped = False
        if self.split == 'train':
            s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
            w_border = self._get_border(128, img.shape[1])
            h_border = self._get_border(128, img.shape[0])
            c[0] = np.random.randint(low=w_border,
                                     high=img.shape[1] - w_border)
            c[1] = np.random.randint(low=h_border,
                                     high=img.shape[0] - h_border)
            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)

        inp1 = inp.copy()
        inp = (inp.astype(np.float32) / 255.)
        color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.down_ratio
        output_w = input_w // self.down_ratio

        num_classes = 1
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)

        landmarks = np.zeros((self.max_objs, 10), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
        lm_reg = np.zeros((self.max_objs, 10), dtype=np.float32)
        lm_ind = np.zeros((self.max_objs), dtype=np.int64)
        lm_mask = np.zeros((self.max_objs), dtype=np.uint8)

        gt_det = []
        cls_id = 0
        for k in range(num_objs):
            flag_lm = False
            bbox = bboxes[k]
            lm = lms[k]
            bbox1 = bbox.copy()
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
                if lm[0] >= 0:
                    lm[0::2] = width - lm[0::2] - 1
                    l_tmp = lm.copy()
                    lm[0:2] = l_tmp[2:4]
                    lm[2:4] = l_tmp[0:2]
                    lm[6:8] = l_tmp[8:10]
                    lm[8:10] = l_tmp[6:8]

            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            if lm[0] >= 0:
                lm[:2] = affine_transform(lm[:2], trans_output)
                lm[2:4] = affine_transform(lm[2:4], trans_output)
                lm[4:6] = affine_transform(lm[4:6], trans_output)
                lm[6:8] = affine_transform(lm[6:8], trans_output)
                lm[8:10] = affine_transform(lm[8:10], trans_output)

            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)

            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)

                draw_umich_gaussian(hm[cls_id], ct_int, radius)

                wh[k] = 1. * w, 1. * h

                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1
                if lm[0]>0 and lm[1]< output_h and lm[2] < output_w and lm[3] < output_h \
                    and lm[6] > 0 and lm[7] > 0 and lm[8] < output_w and lm[9] > 0:

                    lm_ind[k] = ct_int[1] * output_w + ct_int[0]
                    if h * w > 10:
                        lm_mask[k] = 1

                    lm_temp = lm.copy()
                    lm_int = lm_temp.astype(np.int32)
                    lm_reg[k] = lm_temp - lm_int
                    lm_temp[[0, 2, 4, 6,
                             8]] = lm_temp[[0, 2, 4, 6, 8]] - ct_int[0]
                    lm_temp[[1, 3, 5, 7,
                             9]] = lm_temp[[1, 3, 5, 7, 9]] - ct_int[1]
                    landmarks[k] = lm_temp
                gt_det.append([
                    4 * (ct[0] - w / 2), 4 * (ct[1] - h / 2),
                    4 * (ct[0] + w / 2), 4 * (ct[1] + h / 2)
                ])
        # if self.debug :# and ("COCO" in str(self.imgs_path[files_index])):
        #     print(len(lms), len(bboxes))
        #     import matplotlib
        #     matplotlib.use('Agg')
        #     import matplotlib.pyplot as plt
        #     for lm, bb in zip(lms, bboxes):
        #         plt.figure(figsize=(50, 50))

        #         if bb[3] - bb[1] > 0 and bb[2] - bb[0] and np.array(np.where(lm > 0)).shape[1] ==10:
        #             cv2.circle(inp1, (int(lm[0]), int(lm[1])), 2, (255, 0, 0), -1)
        #             cv2.circle(inp1, (int(lm[2]), int(lm[3])), 2, (255, 255, 0), -1)
        #             cv2.circle(inp1, (int(lm[4]), int(lm[5])), 2, (255, 155, 155), -1)
        #             cv2.circle(inp1, (int(lm[6]), int(lm[7])), 2, (255, 0, 255), -1)
        #             cv2.circle(inp1, (int(lm[8]), int(lm[9])), 2, (65, 86, 255), -1)
        #             plt.plot(bb[[0, 2, 2, 0, 0]].T, bb[[1, 1, 3, 3, 1]].T, '.-')
        #     plt.imshow(inp1)
        #     plt.axis('off')
        #     plt.savefig('debug/_after%s'%self.imgs_path[index].split("/")[-1])
        #     time.sleep(10)

        ret = {
            'input': inp,
            'hm': hm,
            'lm': landmarks,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'reg': reg,
            'lm_ind': lm_ind,
            'lm_mask': lm_mask
        }

        if not self.split == 'train':
            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                   np.zeros((1, 4), dtype=np.float32)
            meta = {'gt_det': gt_det}
            ret['meta'] = meta
        return ret
예제 #4
0
    #     return gaussian_map
    # gauss = generate_gaussian_map(bbox, mask.shape)
    # cv2.imwrite('/Users/liudaizong/Downloads/00000_0.png', gauss*255)
    # bb
    h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
    ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
    ct_int = ct.astype(np.int32)
    output_h, output_w = mask.shape[0], mask.shape[1]
    hm = np.zeros((1, output_h, output_w), dtype=np.float32)
    radius = gaussian_radius((math.ceil(h), math.ceil(w)))
    radius = max(0, int(radius)*2)
    # wh[k] = 1. * w, 1. * h
    # ind[k] = ct_int[1] * output_w + ct_int[0]
    # reg[k] = ct - ct_int
    # reg_mask[k] = 1
    draw_umich_gaussian(hm[0], ct_int, radius)
    cv2.imwrite('/Users/liudaizong/Downloads/00000.png', hm[0]*255)
    img = cv2.imread('/Users/liudaizong/Downloads/DAVIS2016/JPEGImages/480p/camel/00046.jpg')
    cv2.imwrite('/Users/liudaizong/Downloads/00000.jpg', hm[0][:,:,None]*img)

    import torch
    scale=22
    x = torch.arange(0., int(output_w), 1)
    y = torch.arange(0., int(output_h), 1).unsqueeze(-1)
    center_x, center_y = torch.from_numpy(ct_int)
    gauss = torch.exp(-((x - center_x) ** 2 + (y - center_y) ** 2) / 2.0 / scale / scale)
    cv2.imwrite('/Users/liudaizong/Downloads/00001.png', gauss.numpy()*255)
    bb
    # import custom_transforms as tr
    # import torch
    # from torchvision import transforms
    def __getitem__(self, index):
        if os.path.exists(self.imgs_path[index]):
            img = cv2.imread(self.imgs_path[index])
        else:
            print("%s not exists" % self.imgs_path[index])
        anns = self.words[index]
        num_objs = min(len(anns), self.max_objs)
        height, width = img.shape[0], img.shape[1]
        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        s = max(img.shape[0], img.shape[1]) * 1.0
        input_h, input_w = self.default_resolution[0], self.default_resolution[
            1]

        flipped = False
        if self.split == 'train':
            s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
            w_border = self._get_border(128, img.shape[1])
            h_border = self._get_border(128, img.shape[0])
            c[0] = np.random.randint(low=w_border,
                                     high=img.shape[1] - w_border)
            c[1] = np.random.randint(low=h_border,
                                     high=img.shape[0] - h_border)

            if np.random.random() < 0.5:
                flipped = True
                img = img[:, ::-1, :]
                c[0] = width - c[0] - 1

        trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp.astype(np.float32) / 255.)
        # if self.split == 'train':
        color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
        inp = (inp - self.mean) / self.std
        inp = inp.transpose(2, 0, 1)

        output_h = input_h // self.down_ratio
        output_w = input_w // self.down_ratio
        num_classes = 1
        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
        landmarks = np.zeros((self.max_objs, 10), dtype=np.float32)
        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
        ind = np.zeros((self.max_objs), dtype=np.int64)
        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)

        draw_gaussian = draw_msra_gaussian if self.mse_loss else \
                                        draw_umich_gaussian

        cls_id = 0
        for k in range(num_objs):
            ann = anns[k]
            bbox = np.array(ann[:4].copy())
            x_o, y_o, w_o, h_o = ann[0], ann[1], ann[2], ann[3]

            bbox = self._coco_box_to_bbox(bbox)
            lm = []
            for i in range(5):
                if self.split == 'train' and ann[4] > 0:
                    x = (ann[4 + 3 * i] - x_o) / (w_o + 1e-14)
                    y = (ann[4 + 3 * i + 1] - y_o) / (h_o + 1e-14)
                    _lm = [x, y]
                else:
                    _lm = [0, 0]
                lm.append(_lm)
            lm = np.array(lm).reshape(1, -1)[0]
            if flipped:
                bbox[[0, 2]] = width - bbox[[2, 0]] - 1

            bbox[:2] = affine_transform(bbox[:2], trans_output)
            bbox[2:] = affine_transform(bbox[2:], trans_output)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)

            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:

                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)

                draw_umich_gaussian(hm[cls_id], ct_int, radius)
                wh[k] = 1. * w, 1. * h
                ind[k] = ct_int[1] * output_w + ct_int[0]
                reg[k] = ct - ct_int
                reg_mask[k] = 1

                landmarks[k] = lm

        ret = {
            'input': inp,
            'hm': hm,
            'lm': landmarks,
            'reg_mask': reg_mask,
            'ind': ind,
            'wh': wh,
            'reg': reg
        }

        if not self.split == 'train':
            gt_det = np.zeros((self.max_objs, 4), dtype=np.float32)
            for k in range(num_objs):
                ann = anns[k]
                bbox = np.array(ann[:4].copy())
                bbox = self._coco_box_to_bbox(bbox)
                gt_det[k:4] = bbox

            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
                   np.zeros((1, 4), dtype=np.float32)
            meta = {'gt_det': gt_det, 'h': height, 'w': width}
            ret['meta'] = meta
        return ret
예제 #6
0
  def __getitem__(self, index): 
    img_id = self.images[index] # id
    img_path = os.path.join(self.img_dir, self.coco.loadImgs(ids=[img_id])[0]['file_name']) 
    ann_ids = self.coco.getAnnIds(imgIds=[img_id]) 
    annotations = self.coco.loadAnns(ids=ann_ids) # label
    labels = np.array([self.cat_ids[anno['category_id']] for anno in annotations]) 
    bboxes = np.array([anno['bbox'] for anno in annotations], dtype=np.float32) 
    if len(bboxes) == 0: 。
      bboxes = np.array([[0., 0., 0., 0.]], dtype=np.float32)
      labels = np.array([[0]])
    bboxes[:, 2:] += bboxes[:, :2]  # x1 y1 w h to x1 y1 x2 y2

    img = cv2.imread(img_path)
    height, width = img.shape[0], img.shape[1]
    center = np.array([width / 2., height / 2.], dtype=np.float32)  # center of image 
    scale = max(height, width) * 1.0

    flipped = False 

    trans_img = get_affine_transform(center, scale, 0, [self.img_size['w'], self.img_size['h']]) 
    img = cv2.warpAffine(img, trans_img, (self.img_size['w'], self.img_size['h'])) 

    img = img.astype(np.float32) / 255. # [0,1]

    img = img.transpose(2, 0, 1)  # from [H, W, C] to [C, H, W]

    # Ground Truth heatmap 
    trans_fmap = get_affine_transform(center, scale, 0, [self.fmap_size['w'], self.fmap_size['h']]) 

    # vectors
    hmap = np.zeros((self.num_classes, self.fmap_size['h'], self.fmap_size['w']), dtype=np.float32)  # heatmap,size(3,96,96)
    w_h_ = np.zeros((self.max_objs, 2), dtype=np.float32)  # width and height
    pxpy = np.zeros((self.max_objs, 2), dtype=np.float32) # length and theta
    regs = np.zeros((self.max_objs, 2), dtype=np.float32)  # regression

    # index
    inds = np.zeros((self.max_objs,), dtype=np.int64)
    ind_masks = np.zeros((self.max_objs,), dtype=np.uint8)

    # detections = []
    for k, (bbox, label) in enumerate(zip(bboxes, labels)): 
      #if flipped:
      #  bbox[[0, 2]] = width - bbox[[2, 0]] - 1
      bbox[:2] = affine_transform(bbox[:2], trans_fmap) 
      bbox[2:] = affine_transform(bbox[2:], trans_fmap)
      bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.fmap_size['w'] - 1) 
      bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.fmap_size['h'] - 1)
      h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] # h and w

      d = math.sqrt((bbox[3]-bbox[1])*(bbox[3]-bbox[1])+(bbox[2]-bbox[0])*(bbox[2]-bbox[0]))/2
      theta = math.pi-math.atan(h/w)

      if h > 0 and w > 0:
        obj_c = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) 
        obj_c_int = obj_c.astype(np.int32) 

        radius = max(0, int(gaussian_radius((math.ceil(h), math.ceil(w)), self.gaussian_iou))) # gaussian_radius
        draw_umich_gaussian(hmap[label], obj_c_int, radius) 
        w_h_[k] = 1. * w, 1. * h
        pxpy[k] = 1. * d, 1. * theta 
        regs[k] = obj_c - obj_c_int  # discretization error 
        inds[k] = obj_c_int[1] * self.fmap_size['w'] + obj_c_int[0] # = fmap_w * cy + cx 
        ind_masks[k] = 1 

    return {'image': img,
            'hmap': hmap, 'w_h_':w_h_,'pxpy': pxpy, 'regs': regs, 'inds': inds, 'ind_masks': ind_masks,
            'c': center, 's': scale, 'img_id': img_id}