コード例 #1
0
def augment(img, split):
    # resize input
    height, width = img.shape[0], img.shape[1]
    center = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
    scale = max(img.shape[0], img.shape[1]) * 1.0
    if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
        scale = np.array([scale, scale], dtype=np.float32)

    if split != 'train':
        center = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
        scale = max(width, height) * 1.0
        scale = np.array([scale, scale])
        x = 32
        input_w, input_h = int((width / 1. + x - 1) // x * x), int((height / 1. + x - 1) // x * x)

    trans_input = data_utils.get_affine_transform(center, scale, 0, [input_w, input_h])
    inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)

    # color augmentation
    orig_img = inp.copy()
    inp = (inp.astype(np.float32) / 255.)
    if split == 'train':
        data_utils.color_aug(_data_rng, inp, _eig_val, _eig_vec)

    # normalize the image
    inp = (inp - mean) / std
    inp = inp.transpose(2, 0, 1)

    output_h, output_w = input_h // tless_config.down_ratio, input_w // tless_config.down_ratio
    trans_output = data_utils.get_affine_transform(center, scale, 0, [output_w, output_h])
    inp_out_hw = (input_h, input_w, output_h, output_w)

    return orig_img, inp, trans_input, trans_output, center, scale, inp_out_hw
コード例 #2
0
ファイル: snake_voc_utils.py プロジェクト: dpengwen/PCR
def augment(img, split, _data_rng, _eig_val, _eig_vec, mean, std, polys=None):
    # resize input
    height, width = img.shape[0], img.shape[1]
    center = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
    scale = max(img.shape[0], img.shape[1]) * 1.0
    if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
        scale = np.array([scale, scale], dtype=np.float32)

    # random crop and flip augmentation
    flipped = False
    if split == 'train':
        scale = scale * np.random.uniform(0.6, 1.4)
        x, y = center
        w_border = data_utils.get_border(width / 4, scale[0]) + 1
        h_border = data_utils.get_border(height / 4, scale[0]) + 1
        center[0] = np.random.randint(low=max(x - w_border, 0),
                                      high=min(x + w_border, width - 1))
        center[1] = np.random.randint(low=max(y - h_border, 0),
                                      high=min(y + h_border, height - 1))

        # flip augmentation
        if np.random.random() < 0.5:
            flipped = True
            img = img[:, ::-1, :]
            center[0] = width - center[0] - 1

    input_h, input_w = snake_config.voc_input_h, snake_config.voc_input_w
    if split != 'train':
        center = np.array([img.shape[1] / 2., img.shape[0] / 2.],
                          dtype=np.float32)
        scale = max(width, height) * 1.0
        scale = np.array([scale, scale])
        x = 32
        input_w, input_h = 512, 512
        # input_w, input_h = (width + x - 1) // x * x, (height + x - 1) // x * x

    trans_input = data_utils.get_affine_transform(center, scale, 0,
                                                  [input_w, input_h])
    inp = cv2.warpAffine(img,
                         trans_input, (input_w, input_h),
                         flags=cv2.INTER_LINEAR)

    # color augmentation
    orig_img = inp.copy()
    inp = (inp.astype(np.float32) / 255.)
    if split == 'train':
        data_utils.color_aug(_data_rng, inp, _eig_val, _eig_vec)
        # blur_aug(inp)

    # normalize the image
    inp = (inp - mean) / std
    inp = inp.transpose(2, 0, 1)

    output_h, output_w = input_h // snake_config.down_ratio, input_w // snake_config.down_ratio
    trans_output = data_utils.get_affine_transform(center, scale, 0,
                                                   [output_w, output_h])
    inp_out_hw = (input_h, input_w, output_h, output_w)

    return orig_img, inp, trans_input, trans_output, flipped, center, scale, inp_out_hw
コード例 #3
0
def augment(img, split, _data_rng, _eig_val, _eig_vec, mean, std, polys):
    # resize input
    height, width = img.shape[0], img.shape[1]
    center = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
    scale = snake_config.scale
    if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
        scale = np.array([scale, scale], dtype=np.float32)

    # random crop and flip augmentation
    flipped = False
    if split == 'train':
        scale = scale * np.random.uniform(0.4, 1.6)
        seed = np.random.randint(0, len(polys))
        index = np.random.randint(0, len(polys[seed]))
        poly = polys[seed][index]['poly']
        center[0], center[1] = poly[np.random.randint(len(poly))]
        border = scale[0] // 2 if scale[0] < width else width - scale[0] // 2
        center[0] = np.clip(center[0], a_min=border, a_max=width - border)
        border = scale[1] // 2 if scale[1] < height else height - scale[1] // 2
        center[1] = np.clip(center[1], a_min=border, a_max=height - border)

        # flip augmentation
        if np.random.random() < 0.5:
            flipped = True
            img = img[:, ::-1, :]
            center[0] = width - center[0] - 1

    input_w, input_h = snake_config.input_w, snake_config.input_h
    if split != 'train':
        center = np.array([width // 2, height // 2])
        scale = np.array([width, height])
        # input_w, input_h = width, height
        input_w, input_h = int((width / 0.85 + 31) // 32 * 32), int(
            (height / 0.85 + 31) // 32 * 32)

    trans_input = data_utils.get_affine_transform(center, scale, 0,
                                                  [input_w, input_h])
    inp = cv2.warpAffine(img,
                         trans_input, (input_w, input_h),
                         flags=cv2.INTER_LINEAR)

    # color augmentation
    orig_img = inp.copy()
    inp = (inp.astype(np.float32) / 255.)
    if split == 'train':
        data_utils.color_aug(_data_rng, inp, _eig_val, _eig_vec)
        # data_utils.blur_aug(inp)

    # normalize the image
    inp = (inp - mean) / std
    inp = inp.transpose(2, 0, 1)

    output_h, output_w = input_h // snake_config.down_ratio, input_w // snake_config.down_ratio
    trans_output = data_utils.get_affine_transform(center, scale, 0,
                                                   [output_w, output_h])
    inp_out_hw = (input_h, input_w, output_h, output_w)

    return orig_img, inp, trans_input, trans_output, flipped, center, scale, inp_out_hw
コード例 #4
0
def augment(img, split):
    # resize input
    height, width = img.shape[0], img.shape[1]
    center = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
    scale = max(img.shape[0], img.shape[1]) * 1.0
    if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
        scale = np.array([scale, scale], dtype=np.float32)

    if split == 'train':
        scale = scale * np.random.uniform(0.6, 1.4)
        center = np.array([0, 0])

        border = scale[0] // 2 if scale[0] < width else width - scale[0] // 2
        border_r = max(width - border, border + 1)
        center[0] = np.random.randint(border, border_r)

        border = scale[1] // 2 if scale[1] < height else height - scale[1] // 2
        border_r = max(height - border, border + 1)
        center[1] = np.random.randint(border, border_r)
        input_w, input_h = input_scale

    if split != 'train':
        center = np.array([img.shape[1] / 2., img.shape[0] / 2.],
                          dtype=np.float32)
        scale = max(width, height) * 1.0
        scale = np.array([scale, scale])
        x = 32
        input_w, input_h = (width + x - 1) // x * x, (height + x - 1) // x * x

    trans_input = data_utils.get_affine_transform(center, scale, 0,
                                                  [input_w, input_h])
    inp = cv2.warpAffine(img,
                         trans_input, (input_w, input_h),
                         flags=cv2.INTER_LINEAR)

    # color augmentation
    orig_img = inp.copy()
    inp = (inp.astype(np.float32) / 255.)
    if split == 'train':
        data_utils.color_aug(_data_rng, inp, _eig_val, _eig_vec)

    # normalize the image
    inp = (inp - mean) / std
    inp = inp.transpose(2, 0, 1)

    output_h, output_w = input_h // tless_config.down_ratio, input_w // tless_config.down_ratio
    trans_output = data_utils.get_affine_transform(center, scale, 0,
                                                   [output_w, output_h])
    inp_out_hw = (input_h, input_w, output_h, output_w)

    return orig_img, inp, trans_input, trans_output, center, scale, inp_out_hw
コード例 #5
0
    def __getitem__(self, index):
        img = self.imgs[index]
        img_id = os.path.basename(img).replace('_leftImg8bit.png', '')
        img = cv2.imread(img)

        width, height = 2048, 1024
        center = np.array([width // 2, height // 2])
        scale = np.array([width, height])
        # input_w, input_h = width, height
        input_w, input_h = int((width / 0.85 + 31) // 32 * 32), int(
            (height / 0.85 + 31) // 32 * 32)
        trans_input = data_utils.get_affine_transform(center, scale, 0,
                                                      [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)

        inp = self.normalize_image(inp)
        ret = {'inp': inp}
        meta = {
            'center': center,
            'scale': scale,
            'test': '',
            'img_id': img_id,
            'ann': ''
        }
        ret.update({'meta': meta})

        return ret
コード例 #6
0
    def __getitem__(self, index):
        ann = self.anns[index]

        path, img_id = self.process_info(ann)
        img = cv2.imread(path)

        width, height = img.shape[1], img.shape[0]
        center = np.array([width // 2, height // 2])
        scale = np.array([width, height])
        x = 32
        input_w = (int(width / 1.) | (x - 1)) + 1
        input_h = (int(height / 1.) | (x - 1)) + 1
        trans_input = data_utils.get_affine_transform(center, scale, 0,
                                                      [input_w, input_h])
        inp = cv2.warpAffine(img,
                             trans_input, (input_w, input_h),
                             flags=cv2.INTER_LINEAR)

        inp = self.normalize_image(inp)
        ret = {'inp': inp}
        meta = {
            'center': center,
            'scale': scale,
            'test': '',
            'img_id': img_id,
            'ann': ''
        }
        ret.update({'meta': meta})

        return ret
コード例 #7
0
    def evaluate(self, output, batch):
        detection = output['detection']
        score = detection[:, 4].detach().cpu().numpy()
        label = detection[:, 5].detach().cpu().numpy().astype(int)
        label = snake_cityscapes_utils.continuous_label_to_cityscapes_label(label)
        py = output['py'][-1].detach().cpu().numpy() * snake_config.down_ratio

        h, w = batch['inp'].size(2), batch['inp'].size(3)
        center = batch['meta']['center'][0].detach().cpu().numpy()
        scale = batch['meta']['scale'][0].detach().cpu().numpy()
        trans_output_inv = data_utils.get_affine_transform(center, scale, 0, [w, h], inv=1)
        py = [data_utils.affine_transform(py_, trans_output_inv) for py_ in py]
        ori_h, ori_w = 1024, 2048
        mask = snake_eval_utils.poly_to_mask(py, label, ori_h, ori_w)

        img_id = batch['meta']['img_id'][0]
        instance_dir = os.path.join(self.instance_dir, img_id)
        os.system('mkdir -p {}'.format(instance_dir))

        self.anns.append(batch['meta']['ann'][0])
        txt_path = os.path.join(self.txt_dir, '{}.txt'.format(img_id))
        with open(txt_path, 'w') as f:
            for i in range(len(label)):
                instance_path = os.path.join(instance_dir, 'instance'+str(i)+'.png')
                cv2.imwrite(instance_path, mask[i])
                instance_path = os.path.join('..\mask', img_id, 'instance'+str(i)+'.png')
                f.write('{} {} {}\n'.format(instance_path, label[i], score[i]))
コード例 #8
0
ファイル: snake.py プロジェクト: wolfworld6/snake
    def evaluate(self, output, batch):
        detection = output['detection']
        score = detection[:, 4].detach().cpu().numpy()
        label = detection[:, 5].detach().cpu().numpy().astype(int)
        py = output['py'][-1].detach().cpu().numpy() * snake_config.down_ratio

        if len(py) == 0:
            return

        img_id = int(batch['meta']['img_id'][0])
        center = batch['meta']['center'][0].detach().cpu().numpy()
        scale = batch['meta']['scale'][0].detach().cpu().numpy()

        h, w = batch['inp'].size(2), batch['inp'].size(3)
        trans_output_inv = data_utils.get_affine_transform(center, scale, 0, [w, h], inv=1)
        img = self.coco.loadImgs(img_id)[0]
        ori_h, ori_w = img['height'], img['width']
        py = [data_utils.affine_transform(py_, trans_output_inv) for py_ in py]
        rles = snake_eval_utils.coco_poly_to_rle(py, ori_h, ori_w)

        coco_dets = []
        for i in range(len(rles)):
            detection = {
                'image_id': img_id,
                'category_id': self.contiguous_category_id_to_json_id[label[i]],
                'segmentation': rles[i],
                'score': float('{:.2f}'.format(score[i]))
            }
            coco_dets.append(detection)

        self.results.extend(coco_dets)
        self.img_ids.append(img_id)
コード例 #9
0
ファイル: inference.py プロジェクト: dpengwen/PCR
    def __getitem__(self, index):
        img_path = self.imgs[index]
        img_name = os.path.basename(img_path)
        
        org_img = cv2.imread(img_path)
        
        if not cfg.test.target_scale:
            img = org_img.copy()
            rz_ratio = 1
        else:
            img, rz_ratio = self.resize(org_img, cfg.test.target_scale[0], cfg.test.target_scale[1])

        width, height = img.shape[1], img.shape[0]
        center = np.array([width // 2, height // 2])
        scale = np.array([width, height])
        x = 32
        input_w = (int(width / 1.) | (x - 1)) + 1
        input_h = (int(height / 1.) | (x - 1)) + 1

        trans_input = data_utils.get_affine_transform(center, scale, 0, [input_w, input_h])
        inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)

        inp = self.normalize_image(inp)
        ret = {'inp': inp}
        meta = {'center': center, 'scale': scale, 'test': '', 'ann': ''}
        ret.update({'meta': meta})
        ret.update({'org_img': org_img})
        ret.update({'rz_img': img})
        ret.update({'rz_ratio': rz_ratio})
        ret.update({'image_name': img_name})
        return ret
コード例 #10
0
ファイル: res.py プロジェクト: kevin860213/clean-pvnet-1
def crop(img, detection, batch, output):
    img = img[0].detach().cpu().numpy()
    fx_config.max_det = 1
    fx_config.ct_score = 0
    fx_config.down_ratio = 4

    box = output['detection'][0, :fx_config.max_det, :4]
    score = output['detection'][0, :fx_config.max_det, 4]
    box = box[score > fx_config.ct_score]
    box = box.detach().cpu().numpy() * fx_config.down_ratio

    center = batch['meta']['center'][0].detach().cpu().numpy()
    scale = batch['meta']['scale'][0].detach().cpu().numpy()
    h, w = batch['inp'].size(2), batch['inp'].size(3)
    trans_output_inv = data_utils.get_affine_transform(center,
                                                       scale,
                                                       0, [w, h],
                                                       inv=1)

    init = [_crop(img, box_, trans_output_inv, output) for box_ in box]
    if len(init) == 0:
        output.update({'inp': [], 'center': [], 'scale': []})
        return []

    inp, center, scale = list(zip(*init))
    inp = torch.cat(inp, dim=0)
    output.update({'inp': inp, 'center': center, 'scale': scale})

    return inp
コード例 #11
0
def pvnet_transform(img, box):
    center = np.array([(box[0] + box[2]) / 2., (box[1] + box[3]) / 2.],
                      dtype=np.float32)
    scale = max(box[2] - box[0], box[3] - box[1]) * tless_config.scale_ratio

    input_w, input_h = tless_pvnet_utils.input_scale
    trans_input = data_utils.get_affine_transform(center, scale, 0,
                                                  [input_w, input_h])
    inp = cv2.warpAffine(img,
                         trans_input, (input_w, input_h),
                         flags=cv2.INTER_LINEAR)

    box = np.array(box).reshape(-1, 2)
    box = data_utils.affine_transform(box, trans_input)
    box = magnify_box(box, tless_config.box_ratio, input_h, input_w)
    new_img = np.zeros_like(inp)
    new_img[box[0, 1]:box[1, 1] + 1,
            box[0, 0]:box[1, 0] + 1] = inp[box[0, 1]:box[1, 1] + 1,
                                           box[0, 0]:box[1, 0] + 1]
    inp = new_img

    orig_img = inp.copy()
    inp = (inp.astype(np.float32) / 255.)

    # normalize the image
    inp = (inp - mean) / std
    inp = inp.transpose(2, 0, 1)

    return orig_img, inp, center, scale
コード例 #12
0
ファイル: snake.py プロジェクト: wolfworld6/snake
    def evaluate(self, output, batch):
        detection = output['detection']
        detection = detection[0] if detection.dim() == 3 else detection
        box = detection[:, :4].detach().cpu().numpy() * snake_config.down_ratio
        score = detection[:, 4].detach().cpu().numpy()
        label = detection[:, 5].detach().cpu().numpy().astype(int)

        img_id = int(batch['meta']['img_id'][0])
        center = batch['meta']['center'][0].detach().cpu().numpy()
        scale = batch['meta']['scale'][0].detach().cpu().numpy()

        if len(box) == 0:
            return

        h, w = batch['inp'].size(2), batch['inp'].size(3)
        trans_output_inv = data_utils.get_affine_transform(center, scale, 0, [w, h], inv=1)
        img = self.coco.loadImgs(img_id)[0]
        ori_h, ori_w = img['height'], img['width']

        coco_dets = []
        for i in range(len(label)):
            box_ = data_utils.affine_transform(box[i].reshape(-1, 2), trans_output_inv).ravel()
            box_[2] -= box_[0]
            box_[3] -= box_[1]
            box_ = list(map(lambda x: float('{:.2f}'.format(x)), box_))
            detection = {
                'image_id': img_id,
                'category_id': self.contiguous_category_id_to_json_id[label[i]],
                'bbox': box_,
                'score': float('{:.2f}'.format(score[i]))
            }
            coco_dets.append(detection)

        self.results.extend(coco_dets)
        self.img_ids.append(img_id)
コード例 #13
0
    def evaluate(self, output, batch):
        img_id = int(batch['meta']['img_id'])
        self.img_ids.append(img_id)
        img_data = self.coco.loadImgs(int(img_id))[0]
        depth_path = img_data['depth_path']

        ann_ids = self.coco.getAnnIds(imgIds=img_id, catIds=self.obj_id)
        annos = self.coco.loadAnns(ann_ids)
        kpt_3d = np.concatenate([annos[0]['fps_3d'], [annos[0]['center_3d']]],
                                axis=0)
        corner_3d = np.array(annos[0]['corner_3d'])
        K = np.array(annos[0]['K'])
        pose_gt = [np.array(anno['pose']) for anno in annos]

        kpt_2d = output['kpt_2d'].detach().cpu().numpy()
        centers = batch['meta']['center']
        scales = batch['meta']['scale']
        boxes = batch['meta']['box']
        h, w = batch['inp'].size(2), batch['inp'].size(3)

        pose_preds = []
        pose_preds_icp = []
        for i in range(len(centers)):
            center = centers[i].detach().cpu().numpy()
            scale = scales[i].detach().cpu().numpy()
            kpt_2d_ = kpt_2d[i]
            trans_inv = data_utils.get_affine_transform(center[0],
                                                        scale[0],
                                                        0, [w, h],
                                                        inv=1)
            kpt_2d_ = data_utils.affine_transform(kpt_2d_, trans_inv)
            if cfg.test.un_pnp:
                var = output['var'][i].detach().cpu().numpy()
                pose_pred = self.uncertainty_pnp(kpt_3d, kpt_2d_, var, K)
            else:
                pose_pred = pvnet_pose_utils.pnp(kpt_3d, kpt_2d_, K)
            pose_preds.append(pose_pred)

            if cfg.test.icp:
                seg = torch.argmax(output['seg'][i],
                                   dim=0).detach().cpu().numpy()
                seg = seg.astype(np.uint8)
                seg = cv2.warpAffine(seg,
                                     trans_inv, (self.width, self.height),
                                     flags=cv2.INTER_NEAREST)
                pose_pred_icp = self.icp_refine(pose_pred.copy(), depth_path,
                                                seg.copy(), K.copy())
                pose_preds_icp.append(pose_pred_icp)

        if cfg.test.icp:
            self.icp_adi.append(self.adi_metric(pose_preds_icp, pose_gt))
            self.icp_cmd5.append(
                self.cm_degree_5_metric(pose_preds_icp, pose_gt))
            self.pose_icp_per_id.append(pose_preds_icp)

        self.adi.append(self.adi_metric(pose_preds, pose_gt))
        self.cmd5.append(self.cm_degree_5_metric(pose_preds, pose_gt))
        self.pose_per_id.append(pose_preds)
コード例 #14
0
def augment(img, split, down_ratio, _data_rng, _eig_val, _eig_vec, mean, std):
    # resize input
    height, width = img.shape[0], img.shape[1]
    center = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
    scale = max(img.shape[0], img.shape[1]) * 1.0

    # random crop and flip augmentation
    flipped = False
    if split == 'train':
        scale = scale * np.random.choice(np.arange(0.6, 1.4, 0.1))
        w_border = get_border(128, img.shape[1])
        h_border = get_border(128, img.shape[0])
        center[0] = np.random.randint(low=w_border,
                                      high=img.shape[1] - w_border)
        center[1] = np.random.randint(low=h_border,
                                      high=img.shape[0] - h_border)

        # flip augmentation
        if np.random.random() < 0.5:
            flipped = True
            img = img[:, ::-1, :]
            center[0] = width - center[0] - 1

    input_h, input_w = (512, 512)
    trans_input = get_affine_transform(center, scale, 0, [input_w, input_h])
    inp = cv2.warpAffine(img,
                         trans_input, (input_w, input_h),
                         flags=cv2.INTER_LINEAR)

    # color augmentation
    orig_img = inp.copy()
    inp = (inp.astype(np.float32) / 255.)
    if split == 'train':
        color_aug(_data_rng, inp, _eig_val, _eig_vec)

    # normalize the image
    inp = (inp - mean) / std
    inp = inp.transpose(2, 0, 1)

    # resize output
    output_h = input_h // down_ratio
    output_w = input_w // down_ratio
    trans_output = get_affine_transform(center, scale, 0, [output_w, output_h])

    return orig_img, inp, trans_input, trans_output, input_h, input_w, output_h, output_w, flipped
コード例 #15
0
def pvnet_transform(img, box):
    center = np.array([(box[0] + box[2]) / 2., (box[1] + box[3]) / 2.], dtype=np.float32)
    scale = np.array([box[2] - box[0], box[3] - box[1]], dtype=np.float32) * 1.2

    input_w, input_h = tless_pvnet_utils.input_scale
    trans_input = data_utils.get_affine_transform(center, scale, 0, [input_w, input_h])
    inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)

    orig_img = inp.copy()
    inp = (inp.astype(np.float32) / 255.)

    # normalize the image
    inp = (inp - mean) / std
    inp = inp.transpose(2, 0, 1)

    return orig_img, inp, center, scale
コード例 #16
0
ファイル: ct_pvnet.py プロジェクト: kevin860213/clean-pvnet-1
    def visualize(self, output, batch, id=0):
        img = batch['img'][0].detach().cpu().numpy()
        center = output['center'][0]
        scale = output['scale'][0]
        h, w = tless_pvnet_utils.input_scale
        trans_output_inv = data_utils.get_affine_transform(center,
                                                           scale,
                                                           0, [w, h],
                                                           inv=1)
        kpt = output['kpt_2d'].detach().cpu().numpy()
        kpt_2d = data_utils.affine_transform(kpt, trans_output_inv)[0]

        img_id = int(batch['img_id'][0])
        anno = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id))[0]
        kpt_3d = np.concatenate([anno['fps_3d'], [anno['center_3d']]], axis=0)
        K = np.array(anno['K'])

        pose_gt = np.array(anno['pose'])
        pose_pred = pvnet_pose_utils.pnp(kpt_3d, kpt_2d, K)

        corner_3d = np.array(anno['corner_3d'])
        corner_2d_gt = pvnet_pose_utils.project(corner_3d, K, pose_gt)
        corner_2d_pred = pvnet_pose_utils.project(corner_3d, K, pose_pred)

        _, ax = plt.subplots(1)
        ax.imshow(img)
        ax.add_patch(
            patches.Polygon(xy=corner_2d_gt[[0, 1, 3, 2, 0, 4, 6, 2]],
                            fill=False,
                            linewidth=1,
                            edgecolor='g'))
        ax.add_patch(
            patches.Polygon(xy=corner_2d_gt[[5, 4, 6, 7, 5, 1, 3, 7]],
                            fill=False,
                            linewidth=1,
                            edgecolor='g'))
        ax.add_patch(
            patches.Polygon(xy=corner_2d_pred[[0, 1, 3, 2, 0, 4, 6, 2]],
                            fill=False,
                            linewidth=1,
                            edgecolor='b'))
        ax.add_patch(
            patches.Polygon(xy=corner_2d_pred[[5, 4, 6, 7, 5, 1, 3, 7]],
                            fill=False,
                            linewidth=1,
                            edgecolor='b'))
        plt.show()
コード例 #17
0
ファイル: res.py プロジェクト: kevin860213/clean-pvnet-1
def _crop(img, box, trans_output_inv, output):
    box = data_utils.affine_transform(box.reshape(-1, 2),
                                      trans_output_inv).ravel()
    center = np.array([(box[0] + box[2]) / 2, (box[1] + box[3]) / 2])
    scale = max(box[2] - box[0], box[3] - box[1]) * tless_config.scale_ratio

    input_h, input_w = tless_pvnet_utils.input_scale
    trans_input = data_utils.get_affine_transform(center, scale, 0,
                                                  [input_w, input_h])

    img = img.astype(np.uint8).copy()
    inp = cv2.warpAffine(img,
                         trans_input, (input_w, input_h),
                         flags=cv2.INTER_LINEAR)

    inp = (inp.astype(np.float32) / 255.)
    inp = (inp - tless_config.mean) / tless_config.std
    inp = inp.transpose(2, 0, 1)
    inp = torch.Tensor(inp).cuda().float()[None]

    init = [inp, center, scale]

    return init
コード例 #18
0
def augment(img,
            split,
            down_ratio,
            _data_rng,
            _eig_val,
            _eig_vec,
            mean,
            std,
            polys,
            boxes=None,
            label=None):
    # resize input
    height, width = img.shape[0], img.shape[1]
    center = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
    scale = max(img.shape[0], img.shape[1]) * 1.0
    scale = 800
    # __import__('ipdb').set_trace()
    # random crop and flip augmentation
    flipped = False
    if cfg.small_num > 0:
        img, polys, boxes, label = small_aug(img, polys, boxes, label,
                                             cfg.small_num)
    if split == 'train':
        scale = scale * np.random.choice(np.arange(0.6, 1.4, 0.1))
        seed = np.random.randint(0, len(polys))
        index = np.random.randint(0, len(polys[seed]))
        x = polys[seed][index]['bbox'][0] + (polys[seed][index]['bbox'][2] -
                                             1) / 2
        y = polys[seed][index]['bbox'][1] + (polys[seed][index]['bbox'][3] -
                                             1) / 2
        w_border = get_border(200, scale)
        h_border = get_border(200, scale)
        if (w_border == 0) or (h_border == 0):
            center[0] = x
            center[1] = y
        else:
            center[0] = np.random.randint(low=max(x - w_border, 0),
                                          high=min(x + w_border, width - 1))
            center[1] = np.random.randint(low=max(y - h_border, 0),
                                          high=min(y + h_border, height - 1))

        # flip augmentation
        if np.random.random() < 0.5:
            flipped = True
            img = img[:, ::-1, :]
            center[0] = width - center[0] - 1

    input_h, input_w = (800, 800)
    if split == 'val':
        center = np.array([1024, 512])
        scale = [2048, 1024]
        input_h, input_w = (1024, 2048)

    # print(center,scale)
    # print(flipped)
    # center = np.array([1272., 718.])
    # scale = 358.4
    # import ipdb; ipdb.set_trace()
    # center = np.array([1583., 306.])
    # print(center)
    # scale = 358.4
    # print(center, scale)
    trans_input = get_affine_transform(center, scale, 0, [input_w, input_h])
    inp = cv2.warpAffine(img,
                         trans_input, (input_w, input_h),
                         flags=cv2.INTER_LINEAR)

    # color augmentation
    orig_img = inp.copy()
    inp = (inp.astype(np.float32) / 255.)
    if split == 'train':
        color_aug(_data_rng, inp, _eig_val, _eig_vec)
        # blur_aug(inp)

    # normalize the image
    inp = (inp - mean) / std
    inp = inp.transpose(2, 0, 1)

    # resize output
    # if split == 'train':
    output_h = input_h // down_ratio
    output_w = input_w // down_ratio
    trans_output = get_affine_transform(center, scale, 0, [output_w, output_h])

    return orig_img, inp, trans_input, trans_output, input_h, input_w, output_h, output_w, flipped, center, scale, \
           polys, boxes, label
コード例 #19
0
ファイル: inference.py プロジェクト: dpengwen/PCR
def inference():
    network = make_network(cfg).cuda()
    load_network(network, cfg.model_dir, resume=cfg.resume, epoch=cfg.test.epoch)
    network.eval()

    with open(os.path.join(cfg.results_dir,'cfg.json'),'w') as fid:
        json.dump(cfg,fid)

    dataset = Dataset()
    visualizer = make_visualizer(cfg)
    infer_time_lst = []
    for batch in tqdm.tqdm(dataset):
        batch['inp'] = torch.FloatTensor(batch['inp'])[None].cuda()
        net_time_s = time.time()
        with torch.no_grad():
            output = network(batch['inp'], batch)
        net_used_time = time.time()-net_time_s

        org_img = batch['org_img']
        rz_img = batch['rz_img']
        rz_ratio = batch['rz_ratio']
        img_name = batch['image_name']
        center = batch['meta']['center']
        scale = batch['meta']['scale']
        h, w = batch['inp'].size(2), batch['inp'].size(3)

        if DEBUG:
            print('------------------img_name={}-------------------------'.format(img_name))
            print('org_img.shape:', org_img.shape)
            print('rz_img.shape:',  rz_img.shape)
            print('input-size:({}, {})'.format(h,w))
        
        if cfg.rescore_map_flag:
            rs_thresh = 0.6
            detections = output['detection'].detach().cpu().numpy()
            polys = output['py'][-1].detach().cpu().numpy()
            rs_hm = torch.sigmoid(output['rs_hm']).detach().cpu().numpy()
            if 0:
                print('output.keys:', output.keys())

            rescores = rescoring_polygons(polys, rs_hm)
            conf_keep = np.where(rescores > rs_thresh)[0]
            
            detections = detections[conf_keep]
            pys = [polys[k]* snake_config.down_ratio for k in conf_keep]
            rescores = rescores[conf_keep]        
            
            rs_hm_path = os.path.join(cfg.vis_dir,(img_name[:-4]+'_rs.png'))
            import matplotlib.pyplot as plt
            plt.imshow(rs_hm[0,0,...])
            plt.savefig(rs_hm_path)
            if 0:
                print('detections.shape:', detections.shape)
                print('pys.num:', len(pys))
                print('rs_hm.shape:', rs_hm.shape)
                x = rs_hm[0,0,...]
                import matplotlib.pyplot as plt 
                plt.imshow(x)
                for k in range(len(pys)):
                    plt.plot(pys[k][:,0], pys[k][:, 1])
                plt.savefig('{}.png'.format(img_name[:-4]))
                plt.close()
                np.save('rs_hm.npy', x)
                np.save('pys.npy', np.array(pys))
                exit()
        else:
            detections = output['detection'].detach().cpu().numpy()
            detections[:,:4] = detections[:, :4] * snake_config.down_ratio
            bboxes = detections[:, :4]
            scores = detections[:, 4]
            labels = detections[:, 5].astype(int)
            ex_pts = output['ex'].detach().cpu().numpy()
            ex_pts = ex_pts * snake_config.down_ratio
            #pys = output['py'][-1].detach().cpu().numpy() * snake_config.down_ratio
            iter_ply_output_lst = [x.detach().cpu().numpy()* snake_config.down_ratio for x in output['py']]
            pys = iter_ply_output_lst[-1]

            if cfg.vis_intermediate_output != 'none':
                if cfg.vis_intermediate_output == 'htp':
                    xmin,ymin,xmax,ymax = bboxes[:,0::4], bboxes[:,1::4], bboxes[:, 2::4], bboxes[:,3::4]
                    pys = np.hstack((xmin,ymin, xmin,ymax,xmax,ymax,xmax,ymin))
                    pys = pys.reshape(pys.shape[0],4,2)
                elif cfg.vis_intermediate_output == 'otp':
                    pys = ex_pts
                elif cfg.vis_intermediate_output == 'clm_1':
                    pys = iter_ply_output_lst[0]
                elif cfg.vis_intermediate_output == 'clm_2':
                    pys = iter_ply_output_lst[1]
                else:
                    raise ValueError('Not supported type:', cfg.vis_intermediate_output)
                cfg.poly_cls_branch = False


            final_contour_feat = output['final_feat'].detach().cpu().numpy()
            if cfg.poly_cls_branch:
                pys_cls = output['py_cls'][-1].detach().cpu().numpy()
                text_poly_scores = pys_cls[:, 1]
                rem_ids = np.where(text_poly_scores > cfg.poly_conf_thresh)[0]
                detections = detections[rem_ids]
                pys = pys[rem_ids]
                text_poly_scores = text_poly_scores[rem_ids]
                ex_pts = ex_pts[rem_ids]
                final_contour_feat = final_contour_feat[rem_ids]
                if DEBUG:
                    print('py_cls_scores:', text_poly_scores)

            if DEBUG:
                print('dets_num:', len(pys))

        if len(pys) == 0:
            all_boundaries, poly_scores = [], []
        else:
            trans_output_inv = data_utils.get_affine_transform(center, scale, 0, [w, h], inv=1)
            all_boundaries   = [data_utils.affine_transform(py_, trans_output_inv) for py_ in pys]
            bboxes_tmp = [data_utils.affine_transform(det[:4].reshape(-1,2), trans_output_inv).flatten() for det in detections]
            ex_pts_tmp = [data_utils.affine_transform(ep, trans_output_inv) for ep in ex_pts]
            detections = np.hstack((np.array(bboxes_tmp), detections[:,4:]))
            ex_pts = np.array(ex_pts_tmp)

            pp_time_s = time.time()
            #sorting detections by scores
            if cfg.poly_cls_branch:
                detections, ex_points, all_boundaries, final_contour_feat, poly_scores \
                  = sorting_det_results(detections, ex_pts, all_boundaries, final_contour_feat, text_poly_scores)
            else:
                detections, ex_points, all_boundaries = sorting_det_results(detections, ex_pts, all_boundaries)
            
            if len(all_boundaries) != 0:
                detections[:,:4] /= rz_ratio
                ex_points /= rz_ratio
                all_boundaries = [poly/rz_ratio for poly in all_boundaries]
                
            if 0:
                import matplotlib.pyplot as plt
                nms_polygons,rem_inds = snake_poly_utils.poly_nms(all_boundaries)
                print('nms_polygons.num:', len(nms_polygons))
                plt.subplot(1,2,1)
                plt = plot_poly(org_img, all_boundaries,scores=scores)
                plt.subplot(1,2,2)
                plt = plot_poly(org_img, nms_polygons)
                plt.savefig('a.png')
                exit()
            
            #nms
            all_boundaries, rem_inds = snake_poly_utils.poly_nms(all_boundaries)
            detections = detections[rem_inds]
            ex_points = ex_points[rem_inds]
            final_contour_feat = final_contour_feat[rem_inds]
            if cfg.poly_cls_branch:
                poly_scores = poly_scores[rem_inds]
            pp_used_time = time.time() - pp_time_s
            infer_time_lst.append([net_used_time, pp_used_time])
            if DEBUG:
                print('infer_time:',[net_used_time, pp_used_time])

            if 0:
                vis_tmp_results(org_img, detections, ex_points, all_boundaries, final_contour_feat, poly_scores, output, indx=img_name[:-4])

        #--------------------------------saving results-------------------------------#
        if cfg.testing_set == 'mlt':
            det_file = os.path.join(cfg.det_dir, ('res_'+img_name[3:-4]+'.txt'))
            saving_mot_det_results(det_file, all_boundaries, testing_set=cfg.testing_set, img=org_img)
        elif cfg.testing_set == 'ic15':
            det_file = os.path.join(cfg.det_dir, ('res_'+img_name[:-4]+'.txt'))
            saving_mot_det_results(det_file, all_boundaries, testing_set=cfg.testing_set, img=org_img)
        elif cfg.testing_set == 'msra':
            det_file = os.path.join(cfg.det_dir, ('res_'+img_name[:-4]+'.txt'))
            saving_mot_det_results(det_file, all_boundaries, testing_set=cfg.testing_set, img=org_img)
        else: #for arbitrary-shape datasets, e.g., CTW,TOT,ART
            det_file = os.path.join(cfg.det_dir, (img_name[:-4]+'.txt'))
            saving_det_results(det_file, all_boundaries, img=org_img)
        
        continue        
        #------------------------visualizing results---------------------------------#
        ## ~~~~~~ vis-v0 ~~~~~~~ ##
        vis_file = os.path.join(cfg.vis_dir,(img_name[:-4]+'.png'))
        if cfg.testing_set == 'ctw':
            gt_file = os.path.join(cfg.gts_dir, (img_name[:-4]+'.txt'))
            gt_polys = load_ctw_gt_label(gt_file)
        elif cfg.testing_set == 'tot':
            gt_file = os.path.join(cfg.gts_dir, ('poly_gt_'+img_name[:-4]+'.mat'))
            gt_polys = load_tot_gt_label(gt_file)
        elif cfg.testing_set == 'art':
            gt_polys = None
        elif cfg.testing_set == 'msra':
            gt_file = os.path.join(cfg.gts_dir, ('gt_'+img_name[:-4]+'.txt'))
            gt_polys = load_msra_gt_label(gt_file)
        else:
            raise ValueError('Not supported dataset ({}) for visualizing'.format(cfg.testing_set))
        plt = vis_dets_gts(org_img, all_boundaries, gt_polys)
        plt.savefig(vis_file,dpi=600,format='png')
        plt.close()
        ### ~~~~~~~~~ vis-v1 ~~~~~~~~~~~ ###
        # if cfg.poly_cls_branch:
        #     visualizing_det_results(org_img,all_boundaries,vis_file, scores=detections[:,4],poly_scores=poly_scores)
        # else:
        #     visualizing_det_results(org_img,all_boundaries,vis_file, scores=detections[:,4])
        ## vis-v2
        #hm_vis_dir = os.path.join(cfg.vis_dir, ('../vis_hm_on_img_dir'))
        #if not os.path.exists(hm_vis_dir):
        #    os.makedirs(hm_vis_dir)
        #visualizer.visualize(output, batch, os.path.join(hm_vis_dir,(img_name[:-4]+'.png')))

    np.save('infer_time.npy', np.array(infer_time_lst))
コード例 #20
0
ファイル: inference_fast.py プロジェクト: dpengwen/PCR
def inference():
    network = make_network(cfg).cuda()
    load_network(network,
                 cfg.model_dir,
                 resume=cfg.resume,
                 epoch=cfg.test.epoch)
    network.eval()

    with open(os.path.join(cfg.results_dir, 'cfg.json'), 'w') as fid:
        json.dump(cfg, fid)

    dataset = Dataset()
    visualizer = make_visualizer(cfg)
    infer_time_lst = []
    for batch in tqdm.tqdm(dataset):
        batch['inp'] = torch.FloatTensor(batch['inp'])[None].cuda()
        net_time_s = time.time()
        with torch.no_grad():
            output = network(batch['inp'], batch)
        net_used_time = time.time() - net_time_s

        org_img = batch['org_img']
        rz_img = batch['rz_img']
        rz_ratio = batch['rz_ratio']
        img_name = batch['image_name']
        center = batch['meta']['center']
        scale = batch['meta']['scale']
        h, w = batch['inp'].size(2), batch['inp'].size(3)

        detections = output['detection'].detach().cpu().numpy()
        detections[:, :4] = detections[:, :4] * snake_config.down_ratio
        bboxes = detections[:, :4]
        scores = detections[:, 4]
        labels = detections[:, 5].astype(int)
        ex_pts = output['ex'].detach().cpu().numpy()
        ex_pts = ex_pts * snake_config.down_ratio
        #pys = output['py'][-1].detach().cpu().numpy() * snake_config.down_ratio
        iter_ply_output_lst = [
            x.detach().cpu().numpy() * snake_config.down_ratio
            for x in output['py']
        ]
        pys = iter_ply_output_lst[-1]

        final_contour_feat = output['final_feat'].detach().cpu().numpy()
        if cfg.poly_cls_branch:
            pys_cls = output['py_cls'][-1].detach().cpu().numpy()
            text_poly_scores = pys_cls[:, 1]
            rem_ids = np.where(text_poly_scores > cfg.poly_conf_thresh)[0]
            detections = detections[rem_ids]
            pys = pys[rem_ids]
            text_poly_scores = text_poly_scores[rem_ids]
            ex_pts = ex_pts[rem_ids]
            final_contour_feat = final_contour_feat[rem_ids]

        if len(pys) == 0:
            all_boundaries, poly_scores = [], []
        else:
            trans_output_inv = data_utils.get_affine_transform(center,
                                                               scale,
                                                               0, [w, h],
                                                               inv=1)
            all_boundaries = [
                data_utils.affine_transform(py_, trans_output_inv)
                for py_ in pys
            ]
            bboxes_tmp = [
                data_utils.affine_transform(det[:4].reshape(-1, 2),
                                            trans_output_inv).flatten()
                for det in detections
            ]
            ex_pts_tmp = [
                data_utils.affine_transform(ep, trans_output_inv)
                for ep in ex_pts
            ]
            detections = np.hstack((np.array(bboxes_tmp), detections[:, 4:]))
            ex_pts = np.array(ex_pts_tmp)

            pp_time_s = time.time()
            #sorting detections by scores
            if cfg.poly_cls_branch:
                detections, ex_points, all_boundaries, final_contour_feat, poly_scores \
                  = sorting_det_results(detections, ex_pts, all_boundaries, final_contour_feat, text_poly_scores)
            else:
                detections, ex_points, all_boundaries = sorting_det_results(
                    detections, ex_pts, all_boundaries)

            if cfg.rle_nms:
                tmp_polys = all_boundaries.copy()
                #all_boundaries, rem_inds = snake_poly_utils.poly_nms(tmp_polys)
                rem_inds = poly_rle_nms(tmp_polys,
                                        detections[:, -1], (h, w),
                                        nms_thresh=0.3)
                all_boundaries = [all_boundaries[idx] for idx in rem_inds]
            else:
                #nms
                all_boundaries, rem_inds = snake_poly_utils.poly_nms(
                    all_boundaries)
            detections = detections[rem_inds]
            ex_points = ex_points[rem_inds]
            final_contour_feat = final_contour_feat[rem_inds]
            if cfg.poly_cls_branch:
                poly_scores = poly_scores[rem_inds]
            pp_used_time = time.time() - pp_time_s
            infer_time_lst.append([net_used_time, pp_used_time])

            if len(all_boundaries) != 0:
                detections[:, :4] /= rz_ratio
                ex_points /= rz_ratio
                all_boundaries = [poly / rz_ratio for poly in all_boundaries]

        #--------------------------------saving results-------------------------------#
        det_file = os.path.join(cfg.det_dir, (img_name[:-4] + '.txt'))
        saving_det_results(det_file, all_boundaries, img=org_img)
コード例 #21
0
ファイル: pvnet.py プロジェクト: kevin860213/clean-pvnet-1
    def visualize(self, output, batch):
        img_id = int(batch['meta']['img_id'])
        img_data = self.coco.loadImgs(int(img_id))[0]
        path = img_data['file_name']
        depth_path = img_data['depth_path']
        img = np.array(Image.open(path))

        ann_ids = self.coco.getAnnIds(imgIds=img_id, catIds=self.obj_id)
        annos = self.coco.loadAnns(ann_ids)
        kpt_3d = np.concatenate([annos[0]['fps_3d'], [annos[0]['center_3d']]],
                                axis=0)
        corner_3d = np.array(annos[0]['corner_3d'])
        K = np.array(annos[0]['K'])

        kpt_2d = output['kpt_2d'].detach().cpu().numpy()
        centers = batch['meta']['center']
        scales = batch['meta']['scale']
        boxes = batch['meta']['box']
        h, w = batch['inp'].size(2), batch['inp'].size(3)

        kpt_2ds = []
        segs = []
        for i in range(len(centers)):
            center = centers[i].detach().cpu().numpy()
            scale = scales[i].detach().cpu().numpy()
            kpt_2d_ = kpt_2d[i]
            trans_inv = data_utils.get_affine_transform(center[0],
                                                        scale[0],
                                                        0, [w, h],
                                                        inv=1)
            kpt_2d_ = data_utils.affine_transform(kpt_2d_, trans_inv)
            kpt_2ds.append(kpt_2d_)

            seg = torch.argmax(output['seg'][i], dim=0).detach().cpu().numpy()
            seg = seg.astype(np.uint8)
            seg = cv2.warpAffine(seg,
                                 trans_inv, (720, 540),
                                 flags=cv2.INTER_NEAREST)
            segs.append(seg)

        _, ax = plt.subplots(1)
        ax.imshow(img)

        # for i in range(len(boxes)):
        #     x_min, y_min, x_max, y_max = boxes[i].view(-1).numpy()
        #     ax.plot([x_min, x_min, x_max, x_max, x_min], [y_min, y_max, y_max, y_min, y_min])

        depth = np.array(Image.open(depth_path)).astype(np.float32)

        for i, kpt_2d in enumerate(kpt_2ds):
            pose_pred = pvnet_pose_utils.pnp(kpt_3d, kpt_2d, K)

            mask = segs[i]
            box = cv2.boundingRect(mask.astype(np.uint8))
            x, y = box[0] + box[2] / 2., box[1] + box[3] / 2.
            z = np.mean(depth[mask != 0] / 10000.)
            x = ((x - K[0, 2]) * z) / float(K[0, 0])
            y = ((y - K[1, 2]) * z) / float(K[1, 1])
            center = [x, y, z]

            # pose_pred[:, 3] = center

            corner_2d_pred = pvnet_pose_utils.project(corner_3d, K, pose_pred)
            ax.add_patch(
                patches.Polygon(xy=corner_2d_pred[[0, 1, 3, 2, 0, 4, 6, 2]],
                                fill=False,
                                linewidth=1,
                                edgecolor='b'))
            ax.add_patch(
                patches.Polygon(xy=corner_2d_pred[[5, 4, 6, 7, 5, 1, 3, 7]],
                                fill=False,
                                linewidth=1,
                                edgecolor='b'))

        for anno in annos:
            pose_gt = np.array(anno['pose'])
            corner_2d_gt = pvnet_pose_utils.project(corner_3d, K, pose_gt)
            ax.add_patch(
                patches.Polygon(xy=corner_2d_gt[[0, 1, 3, 2, 0, 4, 6, 2]],
                                fill=False,
                                linewidth=1,
                                edgecolor='g'))
            ax.add_patch(
                patches.Polygon(xy=corner_2d_gt[[5, 4, 6, 7, 5, 1, 3, 7]],
                                fill=False,
                                linewidth=1,
                                edgecolor='g'))

        plt.show()