Python imresizeの例、mmcv.imresize Pythonの例

コード例 #1

0

ファイルを表示

    def get_seg_masks(self,
                      pts_score,
                      det_pts,
                      det_bboxes,
                      det_labels,
                      test_cfg,
                      ori_shape,
                      scale_factor,
                      rescale=False):
        """
        Get segmentation masks from points and scores

        Args:
            pts_score (Tensor or ndarray): shape (n, num_pts)
            det_pts (Tensor): shape (n, num_pts*2)
            det_bboxes (Tensor): shape (n, 4)
            det_labels (Tensor): shape (n, 1)
            test_cfg (dict): rcnn testing config
            ori_shape: original image size
            scale_factor: scale factor for image
            rescale: whether rescale to original size
        Returns:
            list[list]: encoded masks
        """

        cls_segms = [[] for _ in range(self.bbox_head.num_classes - 1)]
        bboxes = det_bboxes.cpu().numpy()[:, :4]
        labels = det_labels.cpu().numpy() + 1

        if rescale:
            img_h, img_w = ori_shape[:2]
        else:
            img_h = np.round(ori_shape[0] * scale_factor).astype(np.int32)
            img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32)
        scale_factor = 1.0

        for i in range(bboxes.shape[0]):
            bbox = (bboxes[i, :] / scale_factor).astype(np.int32)
            label = labels[i]
            w = max(bbox[2] - bbox[0] + 1, 1)
            h = max(bbox[3] - bbox[1] + 1, 1)

            im_mask = np.zeros((img_h, img_w), dtype=np.uint8)
            im_pts = det_pts[i].clone()
            im_pts = im_pts.reshape(-1, 2)
            im_pts_score = pts_score[i]

            im_pts[:, 0] = (im_pts[:, 0] - bbox[0])
            im_pts[:, 1] = (im_pts[:, 1] - bbox[1])
            _h, _w = h, w
            corner_pts = im_pts.new_tensor([[0, 0], [_h - 1, 0], [0, _w - 1],
                                            [_w - 1, _h - 1]])
            corner_score = im_pts_score.new_tensor([0, 0, 0, 0])
            im_pts = torch.cat([im_pts, corner_pts], dim=0).cpu().numpy()
            im_pts_score = torch.cat([im_pts_score, corner_score],
                                     dim=0).cpu().numpy()
            grids = tuple(np.mgrid[0:_w:1, 0:_h:1])
            bbox_mask = scipy.interpolate.griddata(im_pts, im_pts_score, grids)
            bbox_mask = bbox_mask.transpose(1, 0)
            bbox_mask = mmcv.imresize(bbox_mask, (w, h))

            bbox_mask = bbox_mask.astype(np.float32)
            bbox_mask[np.isnan(bbox_mask)] = 0
            bbox_mask = (bbox_mask > test_cfg.get('pts_score_thr',
                                                  0.5)).astype(np.uint8)
            im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask
            rle = maskUtils.encode(
                np.array(im_mask[:, :, np.newaxis], order='F'))[0]
            cls_segms[label - 1].append(rle)
        return cls_segms

コード例 #2

0

ファイルを表示

def single_gpu_test(model, data_loader, show=False, out_dir=None):
    """Test with single GPU.

    Args:
        model (nn.Module): Model to be tested.
        data_loader (nn.Dataloader): Pytorch data loader.
        show (bool): Whether show results during infernece. Default: False.
        out_dir (str, optional): If specified, the results will be dumped
        into the directory to save output results.

    Returns:
        list: The prediction results.
    """

    model.eval()
    results_0 = []
    results_1 = []
    results_2 = []
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, **data)
        if isinstance(result, list):
            results_0.extend(result[0])
            results_1.extend(result[1])
            results_2.extend(result[2])
        else:
            # results.append(result)
            pass

        if show or out_dir:
            img_tensor = data['img'][0]
            img_metas = data['img_metas'][0].data[0]
            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
            assert len(imgs) == len(img_metas)

            for img, img_meta in zip(imgs, img_metas):
                h, w, _ = img_meta['img_shape']
                img_show = img[:h, :w, :]

                ori_h, ori_w = img_meta['ori_shape'][:-1]
                img_show = mmcv.imresize(img_show, (ori_w, ori_h))

                if out_dir:
                    out_file_0 = osp.join(out_dir + '_dir', img_meta['ori_filename'])
                    out_file_1 = osp.join(out_dir + '_sty', img_meta['ori_filename'])
                    out_file_2 = osp.join(out_dir + '_type', img_meta['ori_filename'])
                else:
                    out_file_0 = None
                    out_file_1 = None
                    out_file_2 = None

                model.module.show_result(
                    img_show,
                    result[0],
                    palette=dataset.PALETTE,
                    show=show,
                    out_file=out_file_0)
                model.module.show_result(
                    img_show,
                    result[1],
                    palette=dataset.PALETTE,
                    show=show,
                    out_file=out_file_1)
                model.module.show_result(
                    img_show,
                    result[2],
                    palette=dataset.PALETTE,
                    show=show,
                    out_file=out_file_2)

        batch_size = data['img'][0].size(0)
        for _ in range(batch_size):
            prog_bar.update()
    return [results_0, results_1, results_2]

コード例 #3

0

ファイルを表示

ファイル: test.py プロジェクト: zwl-max/mmdetection_clw

def single_gpu_test_rotate_rect_img(model,
                                    data_loader,
                                    show=False,
                                    out_dir=None,
                                    show_score_thr=0.3):
    print('clw: using single_gpu_test_rotate_rect_img() !!')
    model.eval()
    results = []
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, rescale=True, **data)

            ########### clw note: for debug
            # for idx, item in enumerate(result[0]):
            #     if item.size == 0:
            #         print('111')

            #    for row in item:
            #         print('boxw:', row[2] - row[0],  'boxh:', row[3] - row[1] )
            #         if row[2] - row[0] == 0 or row[3] - row[1] == 0:
            #             print('aaaa')
            #########

        ##
        img_name = data['img_metas'][0].data[0][0]['ori_filename']
        # origin_name = img_name.split('CAM')[0] + 'CAM' + img_name.split('CAM')[1][0] + '.jpg'
        # data['img_metas'][0].data[0][0]['ori_filename'] = origin_name
        # data['img_metas'][0].data[0][0]['filename'] = data['img_metas'][0].data[0][0]['filename'].rsplit('/', 1)[0] + '/' + origin_name

        aaa = img_name[:-4].split('_')[-9:]
        bbb = [float(a) for a in aaa]
        M_perspective_inv = np.array(bbb).reshape(3, 3)

        for i in range(len(result[0])):
            ddd = []
            ccc = result[0][i][:, :4]  # (n, 4)
            if ccc.size == 0:
                continue
            for xyxy in ccc:
                x1 = xyxy[0]
                y1 = xyxy[1]
                x2 = xyxy[2]
                y2 = xyxy[3]
                cnt = np.array(((x1, y1), (x1, y2), (x2, y2), (x2, y1)))
                ddd.append(cnt)
            ddd = np.array(ddd)

            #
            fff = []
            src_pts = cv2.perspectiveTransform(ddd, M_perspective_inv)
            for cnt in src_pts:
                rect = cv2.boundingRect(cnt)
                x1 = rect[0]
                y1 = rect[1]
                x2 = rect[0] + rect[2]
                y2 = rect[1] + rect[3]
                ggg = np.array((x1, y1, x2, y2))
                fff.append(ggg)
            fff = np.array(fff)

            result[0][
                i][:, :
                   4] = fff  # result[0][i] = np.concatenate((fff, result[0][i][:, 4]), axis=1)
        ##

        batch_size = len(result)
        if show or out_dir:
            if batch_size == 1 and isinstance(data['img'][0], torch.Tensor):
                img_tensor = data['img'][0]
            else:
                img_tensor = data['img'][0].data[0]
            img_metas = data['img_metas'][0].data[0]
            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
            assert len(imgs) == len(img_metas)

            for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):
                h, w, _ = img_meta['img_shape']
                img_show = img[:h, :w, :]

                ori_h, ori_w = img_meta['ori_shape'][:-1]
                img_show = mmcv.imresize(img_show, (ori_w, ori_h))

                if out_dir:
                    out_file = osp.join(out_dir, img_meta['ori_filename'])
                else:
                    out_file = None

                model.module.show_result(img_show,
                                         result[i],
                                         show=show,
                                         out_file=out_file,
                                         score_thr=show_score_thr)

        # encode mask results
        if isinstance(result[0], tuple):
            result = [(bbox_results, encode_mask_results(mask_results))
                      for bbox_results, mask_results in result]
        results.extend(result)

        for _ in range(batch_size):
            prog_bar.update()
    return results

コード例 #4

0

ファイルを表示

def show_result_ins(img,
                    result,
                    class_names,
                    score_thr=0.3,
                    sort_by_density=False,
                    out_file=None):
    """Visualize the instance segmentation results on the image.

    Args:
        img (str or np.ndarray): Image filename or loaded image.
        result (tuple[list] or list): The instance segmentation result.
        class_names (list[str] or tuple[str]): A list of class names.
        score_thr (float): The threshold to visualize the masks.
        sort_by_density (bool): sort the masks by their density.
        out_file (str, optional): If specified, the visualization result will
            be written to the out file instead of shown in a window.

    Returns:
        np.ndarray or None: If neither `show` nor `out_file` is specified, the
            visualized image is returned, otherwise None is returned.
    """

    assert isinstance(class_names, (tuple, list))
    img = mmcv.imread(img)
    img_show = img.copy()
    h, w, _ = img.shape

    cur_result = result[0]
    seg_label = cur_result[0]
    seg_label = seg_label.cpu().numpy().astype(np.uint8)
    cate_label = cur_result[1]
    cate_label = cate_label.cpu().numpy()
    score = cur_result[2].cpu().numpy()

    vis_inds = score > score_thr
    seg_label = seg_label[vis_inds]
    num_mask = seg_label.shape[0]
    cate_label = cate_label[vis_inds]
    cate_score = score[vis_inds]

    if sort_by_density:
        mask_density = []
        for idx in range(num_mask):
            cur_mask = seg_label[idx, :, :]
            cur_mask = mmcv.imresize(cur_mask, (w, h))
            cur_mask = (cur_mask > 0.5).astype(np.int32)
            mask_density.append(cur_mask.sum())
        orders = np.argsort(mask_density)
        seg_label = seg_label[orders]
        cate_label = cate_label[orders]
        cate_score = cate_score[orders]

    np.random.seed(42)
    color_masks = [
        np.random.randint(0, 256, (1, 3), dtype=np.uint8)
        for _ in range(num_mask)
    ]
    for idx in range(num_mask):
        idx = -(idx + 1)
        cur_mask = seg_label[idx, :, :]
        cur_mask = mmcv.imresize(cur_mask, (w, h))
        cur_mask = (cur_mask > 0.5).astype(np.uint8)
        if cur_mask.sum() == 0:
            continue
        color_mask = color_masks[idx]
        cur_mask_bool = cur_mask.astype(np.bool)
        img_show[cur_mask_bool] = img[cur_mask_bool] * 0.5 + color_mask * 0.5

        cur_cate = cate_label[idx]
        cur_score = cate_score[idx]
        #label_text = class_names[cur_cate]
        label_text = " "
        center_y, center_x = ndimage.measurements.center_of_mass(cur_mask)
        vis_pos = (max(int(center_x) - 10, 0), int(center_y))
        cv2.putText(img_show, label_text, vis_pos, cv2.FONT_HERSHEY_COMPLEX,
                    0.3, (255, 255, 255))  # green
    if out_file is None:
        return img
    else:
        mmcv.imwrite(img_show, out_file)

コード例 #5

0

ファイルを表示

ファイル: transforms.py プロジェクト: lin-yuxiang/dolphin

    def __call__(self,
                 img_group,
                 scale,
                 crop_history=None,
                 flip=False,
                 keep_ratio=True,
                 div_255=False,
                 is_flow=False):

        if self.resize_crop or self.rescale_crop:
            img_group, crop_quadruple = self.op_crop(img_group)
            img_shape = img_group[0].shape
            scale_factor = None
        else:
            # 1. rescale
            if keep_ratio:
                tuple_list = [
                    mmcv.imrescale(img, scale, return_scale=True)
                    for img in img_group
                ]
                img_group, scale_factors = list(zip(*tuple_list))
                scale_factor = scale_factors[0]
            else:
                tuple_list = [
                    mmcv.imresize(img, scale, return_scale=True)
                    for img in img_group
                ]
                img_group, w_scales, h_scales = list(zip(*tuple_list))
                scale_factor = np.array(
                    [w_scales[0], h_scales[0], w_scales[0], h_scales[0]],
                    dtype=np.float32)
            if self.pre_mean_volume is not None:
                volume_len = self.pre_mean_volume.shape[0]
                img_group = [
                    img - self.pre_mean_volume[i % volume_len, ...]
                    for i, img in enumerate(img_group)
                ]
            # 2. crop (if necessary)
            if crop_history is not None:
                self.op_crop = GroupCrop(crop_history)
            if self.op_crop is not None:
                img_group, crop_quadruple = self.op_crop(img_group,
                                                         is_flow=is_flow)
            else:
                crop_quadruple = None

            img_shape = img_group[0].shape
        # 3. flip
        if flip:
            img_group = [mmcv.imflip(img) for img in img_group]
        if is_flow:
            for i in range(0, len(img_group), 2):
                img_group[i] = mmcv.iminvert(img_group[i])
        # 4a. div_255
        if div_255:
            img_group = [
                mmcv.imnormalize(img, 0, 255, False) for img in img_group
            ]
        # 4. normalize
        img_group = [
            mmcv.imnormalize(img, self.mean, self.std, self.to_rgb)
            for img in img_group
        ]
        # 5. pad
        if self.size_divisor is not None:
            img_group = [
                mmcv.impad_to_multiple(img, self.size_divisor)
                for img in img_group
            ]
            pad_shape = img_group[0].shape
        else:
            pad_shape = img_shape
        if is_flow:
            assert len(img_group[0].shape) == 2
            img_group = [
                np.stack((flow_x, flow_y), axis=2)
                for flow_x, flow_y in zip(img_group[0::2], img_group[1::2])
            ]
        # 6. transpose
        img_group = [img.transpose(2, 0, 1) for img in img_group]

        # Stack into numpy.array
        img_group = np.stack(img_group, axis=0)
        return img_group, img_shape, pad_shape, scale_factor, crop_quadruple

コード例 #6

0

ファイルを表示

ファイル: webcam_demo_spatiotemporal_det.py プロジェクト: MakeCent/mmaction2

    def read_fn(self):
        """Main function for read thread.

        Contains three steps:

        1) Read and preprocess (resize + norm) frames from source.
        2) Create task by frames from previous step and buffer.
        3) Put task into read queue.
        """
        was_read = True
        start_time = time.time()
        while was_read and not self.stopped:
            # init task
            task = TaskInfo()
            task.clip_vis_length = self.clip_vis_length
            task.frames_inds = self.frames_inds
            task.ratio = self.ratio

            # read buffer
            frames = []
            processed_frames = []
            if len(self.buffer) != 0:
                frames = self.buffer
            if len(self.processed_buffer) != 0:
                processed_frames = self.processed_buffer

            # read and preprocess frames from source and update task
            with self.read_lock:
                before_read = time.time()
                read_frame_cnt = self.window_size - len(frames)
                while was_read and len(frames) < self.window_size:
                    was_read, frame = self.cap.read()
                    if not self.webcam:
                        # Reading frames too fast may lead to unexpected
                        # performance degradation. If you have enough
                        # resource, this line could be commented.
                        time.sleep(1 / self.output_fps)
                    if was_read:
                        frames.append(mmcv.imresize(frame, self.display_size))
                        processed_frame = mmcv.imresize(
                            frame, self.stdet_input_size).astype(np.float32)
                        _ = mmcv.imnormalize_(processed_frame,
                                              **self.img_norm_cfg)
                        processed_frames.append(processed_frame)
            task.add_frames(self.read_id + 1, frames, processed_frames)

            # update buffer
            if was_read:
                self.buffer = frames[-self.buffer_size:]
                self.processed_buffer = processed_frames[-self.buffer_size:]

            # update read state
            with self.read_id_lock:
                self.read_id += 1
                self.not_end = was_read

            self.read_queue.put((was_read, copy.deepcopy(task)))
            cur_time = time.time()
            logger.debug(
                f'Read thread: {1000*(cur_time - start_time):.0f} ms, '
                f'{read_frame_cnt / (cur_time - before_read):.0f} fps')
            start_time = cur_time

コード例 #7

0

ファイルを表示

ファイル: adv_close_strict.py プロジェクト: tabsun/MAG

def update_one_model(ori_img, diff_map, temp_fname, best_temp_fname,
                     darknet_model, frcnn_model, flag, start_yolo_num,
                     start_frcnn_num, dest_num, rate, gt_bboxes, update_mask):
    print("Updating %s..." % flag)
    # generate bbox grad mask
    grad_mask = np.zeros((500, 500, 3), dtype=np.float)
    for bbox in gt_bboxes:
        x1, y1, x2, y2 = bbox
        cv2.rectangle(grad_mask, (x1, y1), (x2, y2), (255, 255, 255), -1)
    grad_mask = np.swapaxes(np.swapaxes(grad_mask, 1, 2), 0, 1).reshape(
        (1, 3, 500, 500))

    step = 0
    max_steps_num = 200 if flag == 'frcnn' else 50
    best_yolo_num = start_yolo_num
    best_frcnn_num = start_frcnn_num
    min_yolo_loss = float('inf')
    min_frcnn_loss = float('inf')
    min_creterion = float('inf')
    best_diff_map = None
    gradient = np.zeros((1, 3, 500, 500), dtype=np.float)

    relu = torch.nn.ReLU()
    while (step < max_steps_num):
        save_format_try_image(ori_img, diff_map, temp_fname)
        yolo_input, frcnn_input = get_yolo_image(temp_fname), get_frcnn_image(
            temp_fname)

        yolo_input.requires_grad = True
        list_boxes = darknet_model(yolo_input)
        yolo_results = post_process(list_boxes)
        yolo_num = len(yolo_results)

        boxes_0 = list_boxes[0].view(3, 85, -1)
        loss_0 = torch.sum(relu(boxes_0[:, 4, :]))
        boxes_1 = list_boxes[1].view(3, 85, -1)
        loss_1 = torch.sum(relu(boxes_1[:, 4, :]))
        boxes_2 = list_boxes[2].view(3, 85, -1)
        loss_2 = torch.sum(relu(boxes_2[:, 4, :]))
        yolo_loss = loss_0 + loss_1 + loss_2

        frcnn_input.requires_grad = True
        frcnn_results, scores, _ = frcnn_model(
            img=[frcnn_input],
            img_metas=[[{
                'filename': '',
                'ori_filename': '',
                'ori_shape': (500, 500, 3),
                'img_shape': (800, 800, 3),
                'pad_shape': (800, 800, 3),
                'scale_factor': np.array([1.6, 1.6, 1.6, 1.6]),
                'flip': False,
                'flip_direction': None,
                'img_norm_cfg': {
                    'mean': np.array([123.675, 116.28, 103.53]),
                    'std': np.array([58.395, 57.12, 57.375]),
                    'to_rgb': True
                }
            }]],
            return_loss=False,
            rescale=False)

        frcnn_results = np.concatenate(frcnn_results)
        frcnn_loss = torch.sum(relu(scores[:, :-1] - 0.049))
        frcnn_num = np.sum(frcnn_results[:, 4] > 0.3)

        # # get gt bboxes
        # gt_bboxes = []
        # h = w = 500
        # for yolo_bbox in yolo_results:
        #     x1, y1, x2, y2 = yolo_bbox[:4]
        #     x1, x2 = int(x1*w), int(x2*w)
        #     y1, y2 = int(y1*h), int(y2*h)
        #     gt_bboxes.append([x1-x2//2, y1-y2//2, x1+x2//2, y1+y2//2])
        # for frcnn_bbox in frcnn_results:
        #     if(frcnn_bbox[-1] > 0.3):
        #         x1, y1, x2, y2 = [int(x/1.6) for x in frcnn_bbox[:4]]
        #         gt_bboxes.append([x1,y1,x2,y2])
        # # generate bbox grad mask
        # grad_mask = np.zeros((500,500,3), dtype=np.float)
        # for bbox in gt_bboxes:
        #     x1, y1, x2, y2 = bbox
        #     cv2.rectangle(grad_mask, (x1,y1), (x2,y2), (255,255,255), -1)
        # grad_mask = np.swapaxes(np.swapaxes(grad_mask, 1, 2), 0, 1).reshape((1,3,500,500))

        if (step == 0):
            epoch_creterion = float(yolo_num) / start_yolo_num + float(
                frcnn_num) / start_frcnn_num

        #creterion = yolo_num if flag == 'yolo' else frcnn_num
        creterion = 10000 * (min(1.,
                                 float(yolo_num) / start_yolo_num) +
                             min(1.,
                                 float(frcnn_num) / start_frcnn_num)) + (
                                     yolo_loss
                                     if flag == 'yolo' else frcnn_loss)
        if (creterion < min_creterion):
            min_creterion = creterion
            min_frcnn_loss = frcnn_loss
            min_yolo_loss = yolo_loss
            best_yolo_num = yolo_num
            best_frcnn_num = frcnn_num
            best_diff_map = diff_map.copy()
            copyfile(temp_fname, best_temp_fname)

        # check rate
        patch_number, area_rate = get_cd_score(fname, best_temp_fname)
        print(
            "%d @ [%d,%d,  %d,%d  --> %d] f_loss=%g y_loss=%g min_f_loss=%g min_y_loss=%g, best patch=%d rate=%g limit=%.2f"
            % (step, yolo_num, frcnn_num, best_yolo_num, best_frcnn_num,
               dest_num, frcnn_loss, yolo_loss, min_frcnn_loss, min_yolo_loss,
               patch_number, area_rate, 100. - rate))
        if (((yolo_num == 0 and flag == 'yolo') or
             (frcnn_num == 0 and flag == 'frcnn')) and area_rate < 0.02
                and patch_number <= 10):
            break

        darknet_model.zero_grad()
        yolo_loss.backward(retain_graph=False)
        yolo_d_grad = yolo_input.grad.data.cpu().numpy().reshape(
            (1, 3, 608, 608))
        yolo_d_grad = np.swapaxes(np.swapaxes(yolo_d_grad[0], 0, 1), 1, 2)
        yolo_d_grad = mmcv.imresize(yolo_d_grad, (500, 500))
        yolo_d_grad = np.swapaxes(np.swapaxes(yolo_d_grad, 1, 2), 0,
                                  1).reshape((1, 3, 500, 500))
        #yolo_d_grad = blur(yolo_d_grad)

        frcnn_model.zero_grad()
        frcnn_loss.backward(retain_graph=False)
        frcnn_d_grad = frcnn_input.grad.data.cpu().numpy().reshape(
            (1, 3, 800, 800))

        frcnn_d_grad[:, 0, :, :] = frcnn_d_grad[:, 0, :, :] * (58.395 / 255.)
        frcnn_d_grad[:, 1, :, :] = frcnn_d_grad[:, 1, :, :] * (57.12 / 255.)
        frcnn_d_grad[:, 2, :, :] = frcnn_d_grad[:, 2, :, :] * (57.375 / 255.)
        frcnn_d_grad = np.swapaxes(np.swapaxes(frcnn_d_grad[0], 0, 1), 1, 2)
        frcnn_d_grad = mmcv.imresize(frcnn_d_grad, (500, 500))
        frcnn_d_grad = np.swapaxes(np.swapaxes(frcnn_d_grad, 1, 2), 0,
                                   1).reshape((1, 3, 500, 500))
        #frcnn_d_norm = np.linalg.norm(frcnn_d_grad, ord=2, axis=1).reshape(500,500)
        #frcnn_d_norm = (frcnn_d_norm - np.min(frcnn_d_norm)) / (np.max(frcnn_d_norm) - np.min(frcnn_d_norm))
        #frcnn_weight = np.repeat(frcnn_d_norm.reshape(1,1,500,500), 3, axis=1)
        #frcnn_d_grad = np.multiply(frcnn_weight, frcnn_d_grad)
        frcnn_d_grad = normalize(frcnn_d_grad.reshape(3, -1), axis=1).reshape(
            (1, 3, 500, 500))
        frcnn_d_grad = frcnn_d_grad * 10
        #frcnn_d_grad = blur(frcnn_d_grad)

        if (flag == 'yolo'):
            alpha = 0.95
        else:
            alpha = 0.8
        gradient = (1. - alpha) * frcnn_d_grad + alpha * yolo_d_grad
        #if(flag == 'frcnn'):
        #    gradient = 0.9 * gradient + 0.1 * grad
        #else:
        #    gradient = grad

        loss = yolo_loss if flag == 'yolo' else frcnn_loss
        if (loss > 10):
            step_size = 2  #0.1 + 0.3*(float(loss)-10.)/(start_loss-10.)
        elif (loss > 5):
            step_size = 2
        else:
            step_size = 0.2
        step_size = step_size * (1. - float(step) / max_steps_num)

        gradient = step_size * gradient

        # blur
        #gradient[0,0,:,:] = gaussian_filter(gradient[0,0,:,:], sigma=3)
        #gradient[0,1,:,:] = gaussian_filter(gradient[0,1,:,:], sigma=3)
        #gradient[0,2,:,:] = gaussian_filter(gradient[0,2,:,:], sigma=3)

        # fix mask
        gradient *= update_mask.astype(np.float)

        diff_map -= gradient

        ## check area rate
        #diff_map[grad_mask == 0] = 0
        #diff_map_change = np.sum(np.abs(diff_map), axis=1)
        #high_thresh = np.percentile(diff_map_change, rate)
        #gray_mask = ((diff_map_change > high_thresh) * 255.).astype(np.uint8)

        #gray_mask = gray_mask.reshape(500,500)
        #diff_map[0,0,:,:][gray_mask == 0] = 0
        #diff_map[0,1,:,:][gray_mask == 0] = 0
        #diff_map[0,2,:,:][gray_mask == 0] = 0

        ## check connected parts' number
        save_format_try_image(ori_img, diff_map, temp_fname)
        cd_map = get_cd_map(fname, temp_fname)
        labels = measure.label(cd_map, background=0, connectivity=2)
        label_num = np.max(labels)

        if (label_num > 10):
            areas = [np.sum(labels == i) for i in range(1, label_num + 1)]
            label_ids = list(range(1, label_num + 1))
            areas, label_ids = zip(*sorted(zip(areas, label_ids)))

            for i in label_ids[:-10]:
                #gray_mask[labels==i] = 0
                diff_map[0, 0, :, :][labels == i] = 0
                diff_map[0, 1, :, :][labels == i] = 0
                diff_map[0, 2, :, :][labels == i] = 0

        #kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(3, 3))
        #gray_mask = cv2.morphologyEx(gray_mask, cv2.MORPH_CLOSE, kernel)
        #gray_mask = gray_mask.reshape(500,500)
        #diff_map[0,0,:,:][gray_mask == 0] = 0
        #diff_map[0,1,:,:][gray_mask == 0] = 0
        #diff_map[0,2,:,:][gray_mask == 0] = 0
        #see = check_image(diff_map)
        #cv2.imwrite('check/%03d_region.jpg' % step, see)
        #cv2.imwrite('check/%03d_region_filter.jpg' % step, cv2.medianBlur(see, 3))
        step += 1

    return float(best_yolo_num) / start_yolo_num + float(
        best_frcnn_num) / start_frcnn_num >= epoch_creterion, best_diff_map

コード例 #8

0

ファイルを表示

ファイル: sample_pts.py プロジェクト: justimyhxu/DenseRepPoints

def sample_dist(gt_bboxes, gt_masks, cfg, num_pts):
    sample_dist_p = cfg.get('sample_dist_p', 1.5)
    pts_list = []
    pts_label_list = []
    # _len = int(np.sqrt(num_pts))
    # assert _len**2 == num_pts
    for i in range(len(gt_bboxes)):
        x1, y1, x2, y2 = gt_bboxes[i].cpu().numpy().astype(np.int32)
        if cfg.get('resize_sample', True):
            w = np.maximum(x2 - x1 + 1, 1)
            h = np.maximum(y2 - y1 + 1, 1)
            mask = mmcv.imresize(gt_masks[i][y1:y1 + h, x1:x1 + w],
                                 (cfg.mask_size, cfg.mask_size))
            polygons = mask_to_poly(mask)
            distance_map = np.ones(mask.shape).astype(np.uint8)
            for poly in polygons:
                poly = np.array(poly).astype(np.int)
                for j in range(len(poly) // 2):
                    x_0, y_0 = poly[2 * j:2 * j + 2]
                    if j == len(poly) // 2 - 1:
                        x_1, y_1 = poly[0:2]
                    else:
                        x_1, y_1 = poly[2 * j + 2:2 * j + 4]
                    cv2.line(distance_map, (x_0, y_0), (x_1, y_1), (0),
                             thickness=2)
            roi_dist_map = cv2.distanceTransform(distance_map, cv2.DIST_L2, 3)
            con_index = np.stack(np.nonzero(roi_dist_map == 0)[::-1], axis=-1)
            roi_dist_map[roi_dist_map == 0] = 1
            prob_dist_map = 1 / roi_dist_map
            prob_dist_map = np.power(prob_dist_map, sample_dist_p)
            prob_dist_map = prob_dist_map / prob_dist_map.sum()

            index_y, index_x = np.nonzero(prob_dist_map > 0)
            index = np.stack([index_x, index_y], axis=-1)
            _len = index.shape[0]
            if len(con_index) == 0:
                pts = np.zeros([2 * num_pts])
            else:
                repeat = num_pts // _len
                mod = num_pts % _len
                perm = np.random.choice(_len,
                                        mod,
                                        replace=False,
                                        p=prob_dist_map.reshape(-1))
                draw = [index.copy() for i in range(repeat)]
                draw.append(index[perm])
                draw = np.concatenate(draw, 0)
                # draw[:num_extreme] = extremes[:num_extreme]
                draw = draw + np.random.rand(*draw.shape)
                x_scale = float(w) / cfg.mask_size
                y_scale = float(h) / cfg.mask_size
                draw[:, 0] = draw[:, 0] * x_scale + x1
                draw[:, 1] = draw[:, 1] * y_scale + y1
                pts = draw.reshape(2 * num_pts)
        else:
            polygons = mask_to_poly(gt_masks[i])
            distance_map = np.ones(gt_masks[i].shape).astype(np.uint8)
            for poly in polygons:
                poly = np.array(poly).astype(np.int)
                for j in range(len(poly) // 2):
                    x_0, y_0 = poly[2 * j:2 * j + 2]
                    if j == len(poly) // 2 - 1:
                        x_1, y_1 = poly[0:2]
                    else:
                        x_1, y_1 = poly[2 * j + 2:2 * j + 4]
                    cv2.line(distance_map, (x_0, y_0), (x_1, y_1), (0),
                             thickness=2)
            dist = cv2.distanceTransform(distance_map, cv2.DIST_L2, 3)
            roi_dist_map = dist[y1:y2, x1:x2]
            con_index = np.stack(np.nonzero(roi_dist_map == 0)[::-1], axis=-1)
            roi_dist_map[roi_dist_map == 0] = 1
            prob_dist_map = 1 / roi_dist_map
            prob_dist_map = np.power(prob_dist_map, sample_dist_p)
            prob_dist_map = prob_dist_map / prob_dist_map.sum()

            index_y, index_x = np.nonzero(prob_dist_map > 0)
            index = np.stack([index_x, index_y], axis=-1)
            _len = index.shape[0]
            if len(con_index) == 0:
                pts = np.zeros([2 * num_pts])
            else:
                repeat = num_pts // _len
                mod = num_pts % _len
                perm = np.random.choice(_len,
                                        mod,
                                        replace=False,
                                        p=prob_dist_map.reshape(-1))
                draw = [index.copy() for i in range(repeat)]
                draw.append(index[perm])
                draw = np.concatenate(draw, 0)
                draw[:, 0] = draw[:, 0] + x1
                draw[:, 1] = draw[:, 1] + y1
                pts = draw.reshape(2 * num_pts)

        pts_list.append(pts)
        pts_long = pts.astype(np.long)
        pts_label = gt_masks[i][pts_long[1::2], pts_long[0::2]]
        pts_label_list.append(pts_label)
    pts_list = np.stack(pts_list, 0)
    pts_label_list = np.stack(pts_label_list, 0)
    return pts_list, pts_label_list

コード例 #9

0

ファイルを表示

def vis_seg(img, result, score_thr, save_dir):
    class_names = [
        'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
        'bicycle'
    ]
    print(class_names)
    imgs = [img]
    if result[0]:
        for img, cur_result in zip(imgs, result):
            h, w, _ = img.shape
            img_show = img[:h, :w, :]

            seg_label = cur_result[0]
            seg_label = seg_label.cpu().numpy().astype(np.uint8)

            cate_label = cur_result[1]
            cate_label = cate_label.cpu().numpy()

            score = cur_result[2].cpu().numpy()

            vis_inds = score > score_thr
            seg_label = seg_label[vis_inds]
            num_mask = seg_label.shape[0]
            cate_label = cate_label[vis_inds]
            cate_score = score[vis_inds]

            mask_density = []
            for idx in range(num_mask):
                cur_mask = seg_label[idx, :, :]
                cur_mask = mmcv.imresize(cur_mask, (w, h))
                cur_mask = (cur_mask > 0.5).astype(np.int32)
                mask_density.append(cur_mask.sum())

            orders = np.argsort(mask_density)
            seg_label = seg_label[orders]
            cate_label = cate_label[orders]
            cate_score = cate_score[orders]

            seg_show = img_show.copy()
            for idx in range(num_mask):
                idx = -(idx + 1)
                cur_mask = seg_label[idx, :, :]
                cur_mask = mmcv.imresize(cur_mask, (w, h))
                cur_mask = (cur_mask > 0.5).astype(np.uint8)

                if cur_mask.sum() == 0:
                    continue

                color_mask = np.random.randint(0, 256, (1, 3), dtype=np.uint8)
                cur_mask_bool = cur_mask.astype(np.bool)
                contours, _ = cv2.findContours(cur_mask * 255, cv2.RETR_TREE,
                                               cv2.CHAIN_APPROX_SIMPLE)
                seg_show[cur_mask_bool] = img_show[
                    cur_mask_bool] * 0.4 + color_mask * 0.6

                color_mask = color_mask[0].tolist()
                cv2.drawContours(seg_show,
                                 contours,
                                 -1,
                                 tuple(color_mask),
                                 1,
                                 lineType=cv2.LINE_AA)

                cur_cate = cate_label[idx]
                cur_score = cate_score[idx]
                label_text = class_names[cur_cate]

                center_y, center_x = ndimage.measurements.center_of_mass(
                    cur_mask)
                vis_pos = (max(int(center_x) - 10, 0), int(center_y))
                cv2.putText(seg_show,
                            label_text,
                            vis_pos,
                            cv2.FONT_HERSHEY_SIMPLEX,
                            0.3, (255, 255, 255),
                            lineType=cv2.LINE_AA)
                cv2.putText(seg_show,
                            '{:.1f}%'.format(cur_score * 100),
                            (vis_pos[0], vis_pos[1] + 9),
                            cv2.FONT_HERSHEY_SIMPLEX,
                            0.25, (255, 255, 255),
                            lineType=cv2.LINE_AA)
        mmcv.imshow(seg_show)
    else:
        print('no detections')

コード例 #10

0

ファイルを表示

def single_gpu_test(model,
                    data_loader,
                    show=False,
                    out_dir=None,
                    show_score_thr=0.3):
    """Test model with single gpu.

    This method tests model with single gpu and gives the 'show' option.
    By setting ``show=True``, it saves the visualization results under
    ``out_dir``.

    Args:
        model (nn.Module): Model to be tested.
        data_loader (nn.Dataloader): Pytorch data loader.
        show (bool): Whether to save viualization results.
            Default: True.
        out_dir (str): The path to save visualization results.
            Default: None.

    Returns:
        list[dict]: The prediction results.
    """
    model.eval()
    results = []
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, rescale=True, **data)

        if show:
            # Visualize the results of MMDetection3D model
            # 'show_results' is MMdetection3D visualization API
            models_3d = (Base3DDetector, Base3DSegmentor,
                         SingleStageMono3DDetector)
            if isinstance(model.module, models_3d):
                model.module.show_results(data, result, out_dir=out_dir)
            # Visualize the results of MMDetection model
            # 'show_result' is MMdetection visualization API
            else:
                batch_size = len(result)
                if batch_size == 1 and isinstance(data['img'][0],
                                                  torch.Tensor):
                    img_tensor = data['img'][0]
                else:
                    img_tensor = data['img'][0].data[0]
                img_metas = data['img_metas'][0].data[0]
                imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
                assert len(imgs) == len(img_metas)

                for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):
                    h, w, _ = img_meta['img_shape']
                    img_show = img[:h, :w, :]

                    ori_h, ori_w = img_meta['ori_shape'][:-1]
                    img_show = mmcv.imresize(img_show, (ori_w, ori_h))

                    if out_dir:
                        out_file = osp.join(out_dir, img_meta['ori_filename'])
                    else:
                        out_file = None

                    model.module.show_result(img_show,
                                             result[i],
                                             show=show,
                                             out_file=out_file,
                                             score_thr=show_score_thr)
        results.extend(result)

        batch_size = len(result)
        for _ in range(batch_size):
            prog_bar.update()
    return results

コード例 #11

0

ファイルを表示

ファイル: anchor_head.py プロジェクト: taofuyu/mmdetection

    def draw_assign_results(self, img_meta, sampling_result):
        if self.draw_assign_results_path == '':
            return

        img = imread(img_meta['ori_filename'])
        if img is None:
            return

        #resize with crop
        img_h, img_w, _ = img.shape
        dst_h, dst_w, _ = img_meta['img_shape']
        img_name = img_meta['ori_filename'].strip('\n').split('/')[-1]
        w_scale = dst_w / img_w
        h_scale = dst_h / img_h
        scale = max(w_scale, h_scale)
        img = imresize(img, (int(img_w * scale), int(img_h * scale)))

        #crop
        now_w = int(img_w * scale)
        now_h = int(img_h * scale)
        box = [0, 0, now_w, now_h]
        if now_w > dst_w:
            box = [(now_w - dst_w) // 2, 0, now_w - (now_w - dst_w) // 2,
                   now_h]
            diff = box[2] - box[0] + 1 - dst_w
            if diff != 0:
                box[2] -= diff
        elif now_h > dst_h:
            box = [
                0, (now_h - dst_h) // 2, now_w, now_h - (now_h - dst_h) // 2
            ]
            diff = box[3] - box[1] + 1 - dst_h
            if diff != 0:
                box[3] -= diff
        import numpy as np
        img = imcrop(img, np.array(box))

        pos_anchor = sampling_result.pos_bboxes
        pos_anchor_label = sampling_result.pos_gt_labels
        color_dict = {
            0: (0, 255, 0),
            1: (255, 153, 18),
            2: (160, 82, 45),
            3: (255, 0, 0),
            4: (3, 168, 158),
            5: (0, 255, 255),
            6: (138, 43, 226),
            7: (64, 224, 205),
            8: (122, 123, 124)
        }
        import cv2
        img = np.ascontiguousarray(img)
        for i in range(len(pos_anchor)):
            anchor = pos_anchor[i, :]
            x_min = max(0, int(anchor[0]))
            y_min = max(0, int(anchor[1]))
            x_max = max(0, int(anchor[2]))
            y_max = max(0, int(anchor[3]))
            label = pos_anchor_label[i].cpu().item()
            if label in self.draw_label:
                cv2.rectangle(img, (x_min, y_min), (x_max, y_max),
                              color=color_dict[label],
                              thickness=1)

        imwrite(img, self.draw_assign_results_path + img_name)

コード例 #12

0

ファイルを表示

def extract_frame(vid_item):
    """Generate optical flow using dense flow.

    Args:
        vid_item (list): Video item containing video full path,
            video (short) path, video id.

    Returns:
        bool: Whether generate optical flow successfully.
    """
    full_path, vid_path, vid_id, method, task, report_file = vid_item
    if '/' in vid_path:
        act_name = osp.basename(osp.dirname(vid_path))
        out_full_path = osp.join(args.out_dir, act_name)
    else:
        out_full_path = args.out_dir

    run_success = -1

    if task == 'rgb':
        if args.use_opencv:
            # Not like using denseflow,
            # Use OpenCV will not make a sub directory with the video name
            try:
                video_name = osp.splitext(osp.basename(vid_path))[0]
                out_full_path = osp.join(out_full_path, video_name)

                vr = mmcv.VideoReader(full_path)
                for i, vr_frame in enumerate(vr):
                    if vr_frame is not None:
                        w, h, _ = np.shape(vr_frame)
                        if args.new_short == 0:
                            if args.new_width == 0 or args.new_height == 0:
                                # Keep original shape
                                out_img = vr_frame
                            else:
                                out_img = mmcv.imresize(
                                    vr_frame,
                                    (args.new_width, args.new_height))
                        else:
                            if min(h, w) == h:
                                new_h = args.new_short
                                new_w = int((new_h / h) * w)
                            else:
                                new_w = args.new_short
                                new_h = int((new_w / w) * h)
                            out_img = mmcv.imresize(vr_frame, (new_h, new_w))
                        mmcv.imwrite(out_img,
                                     f'{out_full_path}/img_{i + 1:05d}.jpg')
                    else:
                        warnings.warn(
                            'Length inconsistent!'
                            f'Early stop with {i + 1} out of {len(vr)} frames.'
                        )
                        break
                run_success = 0
            except Exception:
                run_success = -1
        else:
            if args.new_short == 0:
                cmd = osp.join(
                    f"denseflow '{full_path}' -b=20 -s=0 -o='{out_full_path}'"
                    f' -nw={args.new_width} -nh={args.new_height} -v')
            else:
                cmd = osp.join(
                    f"denseflow '{full_path}' -b=20 -s=0 -o='{out_full_path}'"
                    f' -ns={args.new_short} -v')
            run_success = os.system(cmd)
    elif task == 'flow':
        if args.input_frames:
            if args.new_short == 0:
                cmd = osp.join(
                    f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'"  # noqa: E501
                    f' -nw={args.new_width} --nh={args.new_height} -v --if')
            else:
                cmd = osp.join(
                    f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'"  # noqa: E501
                    f' -ns={args.new_short} -v --if')
        else:
            if args.new_short == 0:
                cmd = osp.join(
                    f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'"  # noqa: E501
                    f' -nw={args.new_width} --nh={args.new_height} -v')
            else:
                cmd = osp.join(
                    f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'"  # noqa: E501
                    f' -ns={args.new_short} -v')
        run_success = os.system(cmd)
    else:
        if args.new_short == 0:
            cmd_rgb = osp.join(
                f"denseflow '{full_path}' -b=20 -s=0 -o='{out_full_path}'"
                f' -nw={args.new_width} -nh={args.new_height} -v')
            cmd_flow = osp.join(
                f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'"  # noqa: E501
                f' -nw={args.new_width} -nh={args.new_height} -v')
        else:
            cmd_rgb = osp.join(
                f"denseflow '{full_path}' -b=20 -s=0 -o='{out_full_path}'"
                f' -ns={args.new_short} -v')
            cmd_flow = osp.join(
                f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'"  # noqa: E501
                f' -ns={args.new_short} -v')
        run_success_rgb = os.system(cmd_rgb)
        run_success_flow = os.system(cmd_flow)
        if run_success_flow == 0 and run_success_rgb == 0:
            run_success = 0

    if run_success == 0:
        print(f'{task} {vid_id} {vid_path} {method} done')
        sys.stdout.flush()

        lock.acquire()
        with open(report_file, 'a') as f:
            line = full_path + '\n'
            f.write(line)
        lock.release()
    else:
        print(f'{task} {vid_id} {vid_path} {method} got something wrong')
        sys.stdout.flush()

    return True

コード例 #13

0

ファイルを表示

def single_gpu_test(model,
                    data_loader,
                    show=False,
                    out_dir=None,
                    efficient_test=False,
                    opacity=0.5,
                    pre_eval=False,
                    format_only=False,
                    format_args={}):
    """Test with single GPU by progressive mode.

    Args:
        model (nn.Module): Model to be tested.
        data_loader (utils.data.Dataloader): Pytorch data loader.
        show (bool): Whether show results during inference. Default: False.
        out_dir (str, optional): If specified, the results will be dumped into
            the directory to save output results.
        efficient_test (bool): Whether save the results as local numpy files to
            save CPU memory during evaluation. Mutually exclusive with
            pre_eval and format_results. Default: False.
        opacity(float): Opacity of painted segmentation map.
            Default 0.5.
            Must be in (0, 1] range.
        pre_eval (bool): Use dataset.pre_eval() function to generate
            pre_results for metric evaluation. Mutually exclusive with
            efficient_test and format_results. Default: False.
        format_only (bool): Only format result for results commit.
            Mutually exclusive with pre_eval and efficient_test.
            Default: False.
        format_args (dict): The args for format_results. Default: {}.
    Returns:
        list: list of evaluation pre-results or list of save file names.
    """
    if efficient_test:
        warnings.warn(
            'DeprecationWarning: ``efficient_test`` will be deprecated, the '
            'evaluation is CPU memory friendly with pre_eval=True')
        mmcv.mkdir_or_exist('.efficient_test')
    # when none of them is set true, return segmentation results as
    # a list of np.array.
    assert [efficient_test, pre_eval, format_only].count(True) <= 1, \
        '``efficient_test``, ``pre_eval`` and ``format_only`` are mutually ' \
        'exclusive, only one of them could be true .'

    model.eval()
    results = []
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    # The pipeline about how the data_loader retrieval samples from dataset:
    # sampler -> batch_sampler -> indices
    # The indices are passed to dataset_fetcher to get data from dataset.
    # data_fetcher -> collate_fn(dataset[index]) -> data_sample
    # we use batch_sampler to get correct data idx
    loader_indices = data_loader.batch_sampler

    for batch_indices, data in zip(loader_indices, data_loader):
        with torch.no_grad():
            result = model(return_loss=False, **data)

        if efficient_test:
            result = [np2tmp(_, tmpdir='.efficient_test') for _ in result]

        if format_only:
            result = dataset.format_results(result,
                                            indices=batch_indices,
                                            **format_args)
        if pre_eval:
            # TODO: adapt samples_per_gpu > 1.
            # only samples_per_gpu=1 valid now
            result = dataset.pre_eval(result, indices=batch_indices)

        results.extend(result)

        if show or out_dir:
            img_tensor = data['img'][0]
            img_metas = data['img_metas'][0].data[0]
            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
            assert len(imgs) == len(img_metas)

            for img, img_meta in zip(imgs, img_metas):
                h, w, _ = img_meta['img_shape']
                img_show = img[:h, :w, :]

                ori_h, ori_w = img_meta['ori_shape'][:-1]
                img_show = mmcv.imresize(img_show, (ori_w, ori_h))

                if out_dir:
                    out_file = osp.join(out_dir, img_meta['ori_filename'])
                else:
                    out_file = None

                model.module.show_result(img_show,
                                         result,
                                         palette=dataset.PALETTE,
                                         show=show,
                                         out_file=out_file,
                                         opacity=opacity)

        batch_size = len(result)
        for _ in range(batch_size):
            prog_bar.update()

    return results

コード例 #14

0

ファイルを表示

ファイル: ocr_transforms.py プロジェクト: open-mmlab/mmocr

    def __call__(self, results):
        rank, _ = get_dist_info()
        if isinstance(self.height, int):
            dst_height = self.height
            dst_min_width = self.min_width
            dst_max_width = self.max_width
        else:
            # Multi-scale resize used in distributed training.
            # Choose one (height, width) pair for one rank id.

            idx = rank % len(self.height)
            dst_height = self.height[idx]
            dst_min_width = self.min_width[idx]
            dst_max_width = self.max_width[idx]

        img_shape = results['img_shape']
        ori_height, ori_width = img_shape[:2]
        valid_ratio = 1.0
        resize_shape = list(img_shape)
        pad_shape = list(img_shape)

        if self.keep_aspect_ratio:
            new_width = math.ceil(float(dst_height) / ori_height * ori_width)
            width_divisor = int(1 / self.width_downsample_ratio)
            # make sure new_width is an integral multiple of width_divisor.
            if new_width % width_divisor != 0:
                new_width = round(new_width / width_divisor) * width_divisor
            if dst_min_width is not None:
                new_width = max(dst_min_width, new_width)
            if dst_max_width is not None:
                valid_ratio = min(1.0, 1.0 * new_width / dst_max_width)
                resize_width = min(dst_max_width, new_width)
                img_resize = mmcv.imresize(results['img'],
                                           (resize_width, dst_height),
                                           backend=self.backend)
                resize_shape = img_resize.shape
                pad_shape = img_resize.shape
                if new_width < dst_max_width:
                    img_resize = mmcv.impad(img_resize,
                                            shape=(dst_height, dst_max_width),
                                            pad_val=self.img_pad_value)
                    pad_shape = img_resize.shape
            else:
                img_resize = mmcv.imresize(results['img'],
                                           (new_width, dst_height),
                                           backend=self.backend)
                resize_shape = img_resize.shape
                pad_shape = img_resize.shape
        else:
            img_resize = mmcv.imresize(results['img'],
                                       (dst_max_width, dst_height),
                                       backend=self.backend)
            resize_shape = img_resize.shape
            pad_shape = img_resize.shape

        results['img'] = img_resize
        results['img_shape'] = resize_shape
        results['resize_shape'] = resize_shape
        results['pad_shape'] = pad_shape
        results['valid_ratio'] = valid_ratio

        return results

コード例 #15

0

ファイルを表示

def single_gpu_test(model,
                    data_loader,
                    show=False,
                    out_dir=None,
                    fps=3,
                    show_score_thr=0.3):
    """Test model with single gpu.

    Args:
        model (nn.Module): Model to be tested.
        data_loader (nn.Dataloader): Pytorch data loader.
        show (bool, optional): If True, visualize the prediction results.
            Defaults to False.
        out_dir (str, optional): Path of directory to save the
            visualization results. Defaults to None.
        fps (int, optional): FPS of the output video.
            Defaults to 3.
        show_score_thr (float, optional): The score threshold of visualization
            (Only used in VID for now). Defaults to 0.3.

    Returns:
        dict[str, list]: The prediction results.
    """
    model.eval()
    results = defaultdict(list)
    dataset = data_loader.dataset
    prev_img_meta = None
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, rescale=True, **data)

        batch_size = data['img'][0].size(0)
        if show or out_dir:
            assert batch_size == 1, 'Only support batch_size=1 when testing.'
            img_tensor = data['img'][0]
            img_meta = data['img_metas'][0].data[0][0]
            img = tensor2imgs(img_tensor, **img_meta['img_norm_cfg'])[0]

            h, w, _ = img_meta['img_shape']
            img_show = img[:h, :w, :]

            ori_h, ori_w = img_meta['ori_shape'][:-1]
            img_show = mmcv.imresize(img_show, (ori_w, ori_h))

            if out_dir:
                out_file = osp.join(out_dir, img_meta['ori_filename'])
            else:
                out_file = None

            model.module.show_result(img_show,
                                     result,
                                     show=show,
                                     out_file=out_file,
                                     score_thr=show_score_thr)

            # Whether need to generate a video from images.
            # The frame_id == 0 means the model starts processing
            # a new video, therefore we can write the previous video.
            # There are two corner cases.
            # Case 1: prev_img_meta == None means there is no previous video.
            # Case 2: i == len(dataset) means processing the last video
            need_write_video = (prev_img_meta is not None
                                and img_meta['frame_id'] == 0
                                or i == len(dataset))
            if out_dir and need_write_video:
                prev_img_prefix, prev_img_name = prev_img_meta[
                    'ori_filename'].rsplit('/', 1)
                prev_img_idx, prev_img_type = prev_img_name.split('.')
                prev_filename_tmpl = '{:0' + str(
                    len(prev_img_idx)) + 'd}.' + prev_img_type
                prev_img_dirs = f'{out_dir}/{prev_img_prefix}'
                prev_img_names = sorted(os.listdir(prev_img_dirs))
                prev_start_frame_id = int(prev_img_names[0].split('.')[0])
                prev_end_frame_id = int(prev_img_names[-1].split('.')[0])

                mmcv.frames2video(prev_img_dirs,
                                  f'{prev_img_dirs}/out_video.mp4',
                                  fps=fps,
                                  fourcc='mp4v',
                                  filename_tmpl=prev_filename_tmpl,
                                  start=prev_start_frame_id,
                                  end=prev_end_frame_id,
                                  show_progress=False)

            prev_img_meta = img_meta

        for key in result:
            if 'mask' in key:
                result[key] = encode_mask_results(result[key])

        for k, v in result.items():
            results[k].append(v)

        for _ in range(batch_size):
            prog_bar.update()

    return results

コード例 #16

0

ファイルを表示

def single_gpu_test(model,
                    data_loader,
                    show=False,
                    out_dir=None,
                    show_score_thr=0.3):
    model.eval()
    results = []
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, rescale=True, **data)

        if show or out_dir:
            img_tensor = data['img'][0]
            img_metas = data['img_metas'][0].data[0]
            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
            assert len(imgs) == len(img_metas)

            for img, img_meta in zip(imgs, img_metas):
                h, w, _ = img_meta['img_shape']
                img_show = img[:h, :w, :]

                ori_h, ori_w = img_meta['ori_shape'][:-1]
                img_show = mmcv.imresize(img_show, (ori_w, ori_h))

                if out_dir:
                    out_file = osp.join(out_dir, img_meta['ori_filename'])
                else:
                    out_file = None

                model.module.show_result(img_show,
                                         result,
                                         show=show,
                                         out_file=out_file,
                                         score_thr=show_score_thr)

        # encode mask results
        if isinstance(result, tuple) and len(result) == 2:
            # Mask R-CNN
            bbox_results, mask_results = result
            encoded_mask_results = encode_mask_results(mask_results)
            result = bbox_results, encoded_mask_results
        elif isinstance(result, tuple) and len(result) == 3:
            # Mask R-CNN + Offset
            bbox_results, mask_results, offset_results = result
            if mask_results is not None:
                encoded_mask_results = encode_mask_results(mask_results)
                result = bbox_results, encoded_mask_results, offset_results
            else:
                # only pred offset
                result = bbox_results, offset_results
        elif isinstance(result, tuple) and len(result) == 4:
            # Mask R-CNN + Offset + Height
            bbox_results, mask_results, offset_results, height_results = result
            encoded_mask_results = encode_mask_results(mask_results)
            result = bbox_results, encoded_mask_results, offset_results, height_results

        results.append(result)

        batch_size = len(data['img_metas'][0].data)
        for _ in range(batch_size):
            prog_bar.update()
    return results

コード例 #17

0

ファイルを表示

ファイル: test.py プロジェクト: vietawake/mmSegmentation

def single_gpu_test(model,
                    data_loader,
                    show=False,
                    out_dir=None,
                    efficient_test=False,
                    opacity=0.5):
    """Test with single GPU.

    Args:
        model (nn.Module): Model to be tested.
        data_loader (utils.data.Dataloader): Pytorch data loader.
        show (bool): Whether show results during inference. Default: False.
        out_dir (str, optional): If specified, the results will be dumped into
            the directory to save output results.
        efficient_test (bool): Whether save the results as local numpy files to
            save CPU memory during evaluation. Default: False.
        opacity(float): Opacity of painted segmentation map.
            Default 0.5.
            Must be in (0, 1] range.
    Returns:
        list: The prediction results.
    """

    model.eval()
    results = []
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, **data)

        if show or out_dir:
            img_tensor = data['img'][0]
            img_metas = data['img_metas'][0].data[0]
            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
            assert len(imgs) == len(img_metas)

            for img, img_meta in zip(imgs, img_metas):
                h, w, _ = img_meta['img_shape']
                img_show = img[:h, :w, :]

                ori_h, ori_w = img_meta['ori_shape'][:-1]
                img_show = mmcv.imresize(img_show, (ori_w, ori_h))

                if out_dir:
                    out_file = osp.join(out_dir, img_meta['ori_filename'])
                else:
                    out_file = None

                model.module.show_result(img_show,
                                         result,
                                         palette=dataset.PALETTE,
                                         show=show,
                                         out_file=out_file,
                                         opacity=opacity)

        if isinstance(result, list):
            if efficient_test:
                result = [np2tmp(_) for _ in result]
            results.extend(result)
        else:
            if efficient_test:
                result = np2tmp(result)
            results.append(result)

        batch_size = len(result)
        for _ in range(batch_size):
            prog_bar.update()
    return results

コード例 #18

0

ファイルを表示

ファイル: test.py プロジェクト: baodijun/LSNet

def single_gpu_test(model,
                    data_loader,
                    bbox_head=None,
                    show=False,
                    out_dir=None,
                    show_score_thr=0.3):
    model.eval()
    results = []
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False,
                           rescale=True,
                           show=show,
                           out_dir=out_dir,
                           **data)

        if show or out_dir:
            img_tensor = data['img'][0]
            img_metas = data['img_metas'][0].data[0]
            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
            assert len(imgs) == len(img_metas)

            for img, img_meta in zip(imgs, img_metas):
                h, w, _ = img_meta['img_shape']
                img_show = img[:h, :w, :]

                ori_h, ori_w = img_meta['ori_shape'][:-1]
                img_show = mmcv.imresize(img_show, (ori_w, ori_h))

                if out_dir:
                    out_file = osp.join(out_dir, img_meta['ori_filename'])
                else:
                    out_file = None

                model.module.show_result(img_show,
                                         result,
                                         show=show,
                                         out_file=out_file,
                                         score_thr=show_score_thr)

        if bbox_head.type == 'LSHead':
            if bbox_head.task == 'bbox':
                extremes = result.pop(-1)
                result = result[0]
            elif bbox_head.task == 'segm':
                bbox_results, poly_results = result
                img_metas = data['img_metas'][0].data[0]
                ori_h, ori_w = img_metas[0]['ori_shape'][:-1]
                encoded_poly_results = encode_poly_results(
                    poly_results, ori_h, ori_w)
                result = bbox_results, encoded_poly_results
        elif isinstance(result, tuple):
            bbox_results, mask_results = result
            encoded_mask_results = encode_mask_results(mask_results)
            result = bbox_results, encoded_mask_results
        results.append(result)

        batch_size = len(data['img_metas'][0].data)
        for _ in range(batch_size):
            prog_bar.update()
    return results

コード例 #19

0

ファイルを表示

ファイル: test.py プロジェクト: shinya7y/UniverseNet

def single_gpu_test(model,
                    data_loader,
                    show=False,
                    out_dir=None,
                    show_score_thr=0.3):
    model.eval()
    results = []
    dataset = data_loader.dataset
    PALETTE = getattr(dataset, 'PALETTE', None)
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, rescale=True, **data)

        batch_size = len(result)
        if show or out_dir:
            if batch_size == 1 and isinstance(data['img'][0], torch.Tensor):
                img_tensor = data['img'][0]
            else:
                img_tensor = data['img'][0].data[0]
            img_metas = data['img_metas'][0].data[0]
            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
            assert len(imgs) == len(img_metas)

            for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):
                h, w, _ = img_meta['img_shape']
                img_show = img[:h, :w, :]

                ori_h, ori_w = img_meta['ori_shape'][:-1]
                img_show = mmcv.imresize(img_show, (ori_w, ori_h))

                if out_dir:
                    out_file = osp.join(out_dir, img_meta['ori_filename'])
                else:
                    out_file = None

                model.module.show_result(img_show,
                                         result[i],
                                         bbox_color=PALETTE,
                                         text_color=PALETTE,
                                         mask_color=PALETTE,
                                         show=show,
                                         out_file=out_file,
                                         score_thr=show_score_thr)

        # encode mask results
        if isinstance(result[0], tuple):
            result = [(bbox_results, encode_mask_results(mask_results))
                      for bbox_results, mask_results in result]
        # This logic is only used in panoptic segmentation test.
        elif isinstance(result[0], dict) and 'ins_results' in result[0]:
            for j in range(len(result)):
                bbox_results, mask_results = result[j]['ins_results']
                result[j]['ins_results'] = (bbox_results,
                                            encode_mask_results(mask_results))

        results.extend(result)

        for _ in range(batch_size):
            prog_bar.update()
    return results

コード例 #20

0

ファイルを表示

ファイル: run_a_video_list.py プロジェクト: tianchiVideoSeg/SOLO

def show_result_ins(img,
                    result,
                    score_thr=0.2,
                    sort_by_density=False,
                    out_file=None):
    """Visualize the instance segmentation results on the image.

    Args:
        img (str or np.ndarray): Image filename or loaded image.
        result (tuple[list] or list): The instance segmentation result.
        score_thr (float): The threshold to visualize the masks.
        sort_by_density (bool): sort the masks by their density.
        out_file (str, optional): If specified, the visualization result will
            be written to the out file instead of shown in a window.

    Returns:
        np.ndarray or None: If neither `show` nor `out_file` is specified, the
            visualized image is returned, otherwise None is returned.
    """

    img = mmcv.imread(img)
    img_show = img.copy()
    h, w, _ = img.shape
    mask = np.zeros_like(img_show)

    if not result or result == [None]:
        return mask
    cur_result = result[0]
    seg_label = cur_result[0]
    seg_label = seg_label.cpu().numpy().astype(np.uint8)
    cate_label = cur_result[1]
    cate_label = cate_label.cpu().numpy()
    score = cur_result[2].cpu().numpy()

    vis_inds = score > score_thr
    seg_label = seg_label[vis_inds]
    num_mask = seg_label.shape[0]
    cate_label = cate_label[vis_inds]
    cate_score = score[vis_inds]

    if sort_by_density:
        mask_density = []
        for idx in range(num_mask):
            cur_mask = seg_label[idx, :, :]
            cur_mask = mmcv.imresize(cur_mask, (w, h))
            cur_mask = (cur_mask > 0.5).astype(np.int32)
            mask_density.append(cur_mask.sum())
        orders = np.argsort(mask_density)
        seg_label = seg_label[orders]
        cate_label = cate_label[orders]
        cate_score = cate_score[orders]

    np.random.seed(42)
    color_masks = [
        np.random.randint(0, 256, (1, 3), dtype=np.uint8)
        for _ in range(num_mask)
    ]
    for idx in range(num_mask):
        idx = -(idx + 1)
        cur_cate = cate_label[idx]
        if cur_cate == 0:
            cur_mask = seg_label[idx, :, :]
            cur_mask = mmcv.imresize(cur_mask, (w, h))
            cur_mask = (cur_mask > 0.5).astype(np.uint8)
            if cur_mask.sum() == 0:
                continue
            color_mask = color_masks[idx]
            cur_mask_bool = cur_mask.astype(np.bool)
            mask[cur_mask_bool] = color_mask

            cur_score = cate_score[idx]

    if out_file is None:
        return mask
    else:
        mmcv.imwrite(mask, out_file)

コード例 #21

0

ファイルを表示

ファイル: aets_mask_head.py プロジェクト: duxiangcheng/AE_TextSpotter

    def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, img_shape, scale_factor,
                      rescale, return_rect=False):
        """Get segmentation masks from mask_pred and bboxes.

        Args:
            mask_pred (Tensor or ndarray): shape (n, #class+1, h, w).
                For single-scale testing, mask_pred is the direct output of
                model, whose type is Tensor, while for multi-scale testing,
                it will be converted to numpy array outside of this method.
            det_bboxes (Tensor): shape (n, 4/5)
            det_labels (Tensor): shape (n, )
            rcnn_test_cfg (dict): rcnn testing config
            ori_shape: original image size

        Returns:
            list[list]: encoded masks
        """
        if isinstance(mask_pred, torch.Tensor):
            mask_pred = mask_pred.sigmoid().cpu().detach().numpy()
        assert isinstance(mask_pred, np.ndarray)
        # when enabling mixed precision training, mask_pred may be float16
        # numpy array
        mask_pred = mask_pred.astype(np.float32)

        if return_rect:
            rects = []
        else:
            cls_segms = [[] for _ in range(self.num_classes - 1)]

        bboxes = det_bboxes.cpu().detach().numpy()[:, :4]
        labels = det_labels.cpu().detach().numpy() + 1

        if rescale:
            img_h, img_w = ori_shape[:2]
        else:
            img_h, img_w = img_shape[:2]
            scale_factor = 1.0

        for i in range(bboxes.shape[0]):
            bbox = (bboxes[i, :] / scale_factor).astype(np.int32)
            label = labels[i]
            w = max(bbox[2] - bbox[0] + 1, 1)
            h = max(bbox[3] - bbox[1] + 1, 1)

            if not self.class_agnostic:
                mask_pred_ = mask_pred[i, label, :, :]
            else:
                mask_pred_ = mask_pred[i, 0, :, :]
            im_mask = np.zeros((img_h, img_w), dtype=np.uint8)

            bbox_mask = mmcv.imresize(mask_pred_, (w, h))
            bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype(np.uint8)
            try:
                im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask
            except:
                print(bbox, img_h, img_w)
                exit()

            if return_rect:
                cnt = np.stack(np.where(im_mask == 1)).T
                rect = cv2.boxPoints(cv2.minAreaRect(cnt))
                rect = np.array(rect)[:, ::-1].reshape(-1)
                rects.append(rect)

            else:
                rle = mask_util.encode(
                    np.array(im_mask[:, :, np.newaxis], order='F'))[0]
                cls_segms[label - 1].append(rle)

        if return_rect:
            return rects
        return cls_segms

コード例 #22

0

ファイルを表示

def extract_frame(vid_item, dev_id=0):
    """Generate optical flow using dense flow.

    Args:
        vid_item (list): Video item containing video full path,
            video (short) path, video id.
        dev_id (int): Device id.

    Returns:
        bool: Whether generate optical flow successfully.
    """
    full_path, vid_path, vid_id, method, task = vid_item
    if ('/' in vid_path):
        act_name = osp.basename(osp.dirname(vid_path))
        out_full_path = osp.join(args.out_dir, act_name)
    else:
        out_full_path = args.out_dir

    if task == 'rgb':
        if args.use_opencv:
            # Not like using denseflow,
            # Use OpenCV will not make a sub directory with the video name
            video_name = osp.splitext(osp.basename(vid_path))[0]
            out_full_path = osp.join(out_full_path, video_name)

            vr = mmcv.VideoReader(full_path)
            for i in range(len(vr)):
                if vr[i] is not None:
                    w, h, c = np.shape(vr[i])
                    if args.new_short == 0:
                        out_img = mmcv.imresize(
                            vr[i], (args.new_width, args.new_height))
                    else:
                        if min(h, w) == h:
                            new_h = args.new_short
                            new_w = int((new_h / h) * w)
                        else:
                            new_w = args.new_short
                            new_h = int((new_w / w) * h)
                        out_img = mmcv.imresize(vr[i], (new_h, new_w))
                    mmcv.imwrite(out_img,
                                 f'{out_full_path}/img_{i + 1:05d}.jpg')
                else:
                    warnings.warn(
                        'Length inconsistent!'
                        f'Early stop with {i + 1} out of {len(vr)} frames.')
                    break
        else:
            if args.new_short == 0:
                cmd = osp.join(
                    f"denseflow '{full_path}' -b=20 -s=0 -o='{out_full_path}'"
                    f' -nw={args.new_width} -nh={args.new_height} -v')
            else:
                cmd = osp.join(
                    f"denseflow '{full_path}' -b=20 -s=0 -o='{out_full_path}'"
                    f' -ns={args.new_short} -v')
            os.system(cmd)
    elif task == 'flow':
        if args.new_short == 0:
            cmd = osp.join(
                f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'"  # noqa: E501
                f' -nw={args.new_width} --nh={args.new_height} -v')
        else:
            cmd = osp.join(
                f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'"  # noqa: E501
                f' -ns={args.new_short} -v')
        os.system(cmd)
    else:
        if args.new_short == 0:
            cmd_rgb = osp.join(
                f"denseflow '{full_path}' -b=20 -s=0 -o='{out_full_path}'"
                f' -nw={args.new_width} -nh={args.new_height} -v')
            cmd_flow = osp.join(
                f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'"  # noqa: E501
                f' -nw={args.new_width} -nh={args.new_height} -v')
        else:
            cmd_rgb = osp.join(
                f"denseflow '{full_path}' -b=20 -s=0 -o='{out_full_path}'"
                f' -ns={args.new_short} -v')
            cmd_flow = osp.join(
                f"denseflow '{full_path}' -a={method} -b=20 -s=1 -o='{out_full_path}'"  # noqa: E501
                f' -ns={args.new_short} -v')
        os.system(cmd_rgb)
        os.system(cmd_flow)

    print(f'{task} {vid_id} {vid_path} {method} done')
    sys.stdout.flush()
    return True

コード例 #23

0

ファイルを表示

def single_gpu_test(model,
                    data_loader,
                    show=False,
                    out_dir=None,
                    show_score_thr=0.3):
    model.eval()
    results = []
    eval_results = []
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, rescale=True, **data)

        batch_size = len(result)
        if show or out_dir:
            if batch_size == 1 and isinstance(data['img'][0], torch.Tensor):
                img_tensor = data['img'][0]
            else:
                img_tensor = data['img'][0].data[0]
            img_metas = data['img_metas'][0].data[0]
            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
            assert len(imgs) == len(img_metas)

            for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):
                h, w, _ = img_meta['img_shape']
                #                 img_show = img[:h, :w, :]
                if img_show.shape[-1] > 3:
                    img_show = img[:h, :w, :3][:, :, ::-1]
                else:
                    img_show = img[:h, :w, :]

                ori_h, ori_w = img_meta['ori_shape'][:-1]
                img_show = mmcv.imresize(img_show, (ori_w, ori_h))

                if out_dir:
                    out_file = osp.join(out_dir, img_meta['ori_filename'])
                else:
                    out_file = None

                model.module.show_result(img_show,
                                         result[i][:2],
                                         show=show,
                                         out_file=out_file,
                                         score_thr=show_score_thr)

        # encode mask results

        if isinstance(result[0], tuple):
            if len(result[0]) == 3:
                result = [(bbox_results, encode_mask_results(mask_results),
                           poly_points)
                          for bbox_results, mask_results, poly_points in result
                          ]
            else:
                result = [(bbox_results, encode_mask_results(mask_results))
                          for bbox_results, mask_results in result]
        results.extend(result)
        eval_results.extend([res[:2] for res in result])

        for _ in range(batch_size):
            prog_bar.update()
    return results, eval_results

コード例 #24

0

ファイルを表示

ファイル: transforms.py プロジェクト: sshyran/openvino-mmaction

    def __call__(self,
                 img_group,
                 scale,
                 crop_history=None,
                 flip=False,
                 rotate=None,
                 keep_ratio=True,
                 dropout_prob=None,
                 div_255=False,
                 transpose=True,
                 stack=True):
        # 1. rescale
        if keep_ratio:
            tuple_list = [
                mmcv.imrescale(img, scale, return_scale=True)
                for img in img_group
            ]
            img_group, scale_factors = list(zip(*tuple_list))
            scale_factor = scale_factors[0]
        else:
            tuple_list = [
                mmcv.imresize(img, scale, return_scale=True)
                for img in img_group
            ]
            img_group, w_scales, h_scales = list(zip(*tuple_list))
            scale_factor = np.array(
                [w_scales[0], h_scales[0], w_scales[0], h_scales[0]],
                dtype=np.float32)

        # 2. rotate
        if rotate is not None:
            img_group = [mmcv.imrotate(img, rotate) for img in img_group]

        # 3. crop (if necessary)
        if crop_history is not None:
            self.op_crop = GroupCrop(crop_history)
        if self.op_crop is not None:
            img_group, crop_quadruple = self.op_crop(img_group)
        else:
            crop_quadruple = None

        img_shape = img_group[0].shape

        # 4. flip
        if flip:
            img_group = [mmcv.imflip(img) for img in img_group]

        # 5a. extra augmentation
        if self.extra_augm is not None:
            img_group = self.extra_augm(img_group)

        # 5b. coarse dropout
        if self.dropout_scale is not None and dropout_prob is not None and dropout_prob > 0.0:
            dropout_mask = self._coarse_dropout_mask(img_group[0].shape,
                                                     dropout_prob,
                                                     self.dropout_scale)
            img_group = [img * dropout_mask for img in img_group]

        # 6a. div_255
        if div_255:
            img_group = [
                mmcv.imnormalize(img, 0, 255, False) for img in img_group
            ]

        # 6b. normalize
        if self.mean is not None and self.std is not None:
            img_group = [
                mmcv.imnormalize(img, self.mean, self.std, self.to_rgb)
                for img in img_group
            ]
        elif self.to_rgb:
            img_group = [mmcv.bgr2rgb(img) for img in img_group]

        # 7. pad
        if self.size_divisor is not None:
            img_group = [
                mmcv.impad_to_multiple(img, self.size_divisor)
                for img in img_group
            ]
            pad_shape = img_group[0].shape
        else:
            pad_shape = img_shape

        # 8. transpose
        if transpose:
            img_group = [img.transpose((2, 0, 1)) for img in img_group]

        # 9. stack into numpy.array
        if stack:
            img_group = np.stack(img_group, axis=0)

        return img_group, img_shape, pad_shape, scale_factor, crop_quadruple

コード例 #25

0

ファイルを表示

ファイル: fcn_mask_head.py プロジェクト: Envivia/Vision-mmdetection

    def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg,
                      ori_shape, scale_factor, rescale):
        """Get segmentation masks from mask_pred and bboxes.

        Args:
            mask_pred (Tensor or ndarray): shape (n, #class+1, h, w).
                For single-scale testing, mask_pred is the direct output of
                model, whose type is Tensor, while for multi-scale testing,
                it will be converted to numpy array outside of this method.
            det_bboxes (Tensor): shape (n, 4/5)
            det_labels (Tensor): shape (n, )
            img_shape (Tensor): shape (3, )
            rcnn_test_cfg (dict): rcnn testing config
            ori_shape: original image size

        Returns:
            list[list]: encoded masks
        """
        if isinstance(mask_pred, torch.Tensor):
            mask_pred = mask_pred.sigmoid().cpu().numpy()
        assert isinstance(mask_pred, np.ndarray)
        # when enabling mixed precision training, mask_pred may be float16
        # numpy array
        mask_pred = mask_pred.astype(np.float32)

        cls_segms = [[] for _ in range(self.num_classes - 1)]
        bboxes = det_bboxes.cpu().numpy()[:, :4]
        labels = det_labels.cpu().numpy() + 1

        if rescale:
            img_h, img_w = ori_shape[:2]
        else:
            img_h = np.round(ori_shape[0] * scale_factor).astype(np.int32)
            img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32)
            scale_factor = 1.0

        for i in range(bboxes.shape[0]):
            if not isinstance(scale_factor, (float, np.ndarray)):
                scale_factor = scale_factor.cpu().numpy()
            bbox = (bboxes[i, :] / scale_factor).astype(np.int32)
            label = labels[i]
            w = max(bbox[2] - bbox[0] + 1, 1)
            h = max(bbox[3] - bbox[1] + 1, 1)

            if not self.class_agnostic:
                mask_pred_ = mask_pred[i, label, :, :]
            else:
                mask_pred_ = mask_pred[i, 0, :, :]

            bbox_mask = mmcv.imresize(mask_pred_, (w, h))
            bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype(
                np.uint8)

            if rcnn_test_cfg.get('crop_mask', False):
                im_mask = bbox_mask
            else:
                im_mask = np.zeros((img_h, img_w), dtype=np.uint8)
                im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask

            if rcnn_test_cfg.get('rle_mask_encode', True):
                rle = mask_util.encode(
                    np.array(im_mask[:, :, np.newaxis], order='F'))[0]
                cls_segms[label - 1].append(rle)
            else:
                cls_segms[label - 1].append(im_mask)

        return cls_segms

コード例 #26

0

ファイルを表示

ファイル: tsd_max.py プロジェクト: JosieHong/SiamPolar

    def prepare_test_img(self, idx):
        """Prepare an image for testing (multi-scale and flipping)"""
        img_info = self.img_infos[idx]
        img = mmcv.imread(osp.join(self.img_prefix[:-11],
                                   img_info['filename']))
        # corruption
        if self.corruption is not None:
            img = corrupt(img,
                          severity=self.corruption_severity,
                          corruption_name=self.corruption)
        # load proposals if necessary
        if self.proposals is not None:
            proposal = self.proposals[idx][:self.num_max_proposals]
            if not (proposal.shape[1] == 4 or proposal.shape[1] == 5):
                raise AssertionError(
                    'proposals should have shapes (n, 4) or (n, 5), '
                    'but found {}'.format(proposal.shape))
        else:
            proposal = None

        # get img_refer from first frame
        first_frame_idx = img_info["first_frame"]
        refer_info = self.img_infos[first_frame_idx]
        refer_ann = self.get_ann_info(first_frame_idx)
        img_refer = mmcv.imread(
            osp.join(self.img_prefix[:-11], refer_info['filename']))
        # crop the bbox
        img_refer = torch.squeeze(
            torch.Tensor(mmcv.imcrop(img_refer, refer_ann["bboxes"])))
        # resize to refer_scale
        img_refer = torch.Tensor(
            mmcv.imresize(np.float32(img_refer),
                          self.refer_scale,
                          return_scale=False)).permute(2, 0, 1)

        def prepare_single(img, scale, flip, proposal=None):
            _img, img_shape, pad_shape, scale_factor = self.img_transform(
                img, scale, flip, keep_ratio=self.resize_keep_ratio)
            _img = to_tensor(_img)
            _img_meta = dict(ori_shape=(img_info['height'], img_info['width'],
                                        3),
                             img_shape=img_shape,
                             pad_shape=pad_shape,
                             scale_factor=scale_factor,
                             flip=flip)
            if proposal is not None:
                if proposal.shape[1] == 5:
                    score = proposal[:, 4, None]
                    proposal = proposal[:, :4]
                else:
                    score = None
                _proposal = self.bbox_transform(proposal, img_shape,
                                                scale_factor, flip)
                _proposal = np.hstack([_proposal, score
                                       ]) if score is not None else _proposal
                _proposal = to_tensor(_proposal)
            else:
                _proposal = None
            return _img, _img_meta, _proposal

        imgs = []
        img_metas = []
        img_refers = []
        proposals = []
        for scale in self.img_scales:
            _img, _img_meta, _proposal = prepare_single(
                img, scale, False, proposal)
            imgs.append(_img)
            img_metas.append(DC(_img_meta, cpu_only=True))
            img_refers.append(DC(to_tensor(img_refer), stack=True))
            proposals.append(_proposal)
            if self.flip_ratio > 0:
                _img, _img_meta, _proposal = prepare_single(
                    img, scale, True, proposal)
                imgs.append(_img)
                img_metas.append(DC(_img_meta, cpu_only=True))
                img_refers.append(DC(to_tensor(img_refer), stack=True))
                proposals.append(_proposal)
        data = dict(img=imgs, img_meta=img_metas, img_refer=img_refers)
        if self.proposals is not None:
            data['proposals'] = proposals
        return data

コード例 #27

0

ファイルを表示

def main():
    args = parse_args()

    frame_paths, original_frames = frame_extraction(args.video)
    num_frame = len(frame_paths)
    h, w, _ = original_frames[0].shape

    # resize frames to shortside 256
    new_w, new_h = mmcv.rescale_size((w, h), (256, np.Inf))
    frames = [mmcv.imresize(img, (new_w, new_h)) for img in original_frames]
    w_ratio, h_ratio = new_w / w, new_h / h

    # Get clip_len, frame_interval and calculate center index of each clip
    config = mmcv.Config.fromfile(args.config)
    config.merge_from_dict(args.cfg_options)
    val_pipeline = config.data.val.pipeline

    sampler = [x for x in val_pipeline if x['type'] == 'SampleAVAFrames'][0]
    clip_len, frame_interval = sampler['clip_len'], sampler['frame_interval']
    window_size = clip_len * frame_interval
    assert clip_len % 2 == 0, 'We would like to have an even clip_len'
    # Note that it's 1 based here
    timestamps = np.arange(window_size // 2, num_frame + 1 - window_size // 2,
                           args.predict_stepsize)

    # Load label_map
    label_map = load_label_map(args.label_map)
    try:
        if config['data']['train']['custom_classes'] is not None:
            label_map = {
                id + 1: label_map[cls]
                for id, cls in enumerate(config['data']['train']
                                         ['custom_classes'])
            }
    except KeyError:
        pass

    # Get Human detection results
    center_frames = [frame_paths[ind - 1] for ind in timestamps]
    human_detections = detection_inference(args, center_frames)
    for i in range(len(human_detections)):
        det = human_detections[i]
        det[:, 0:4:2] *= w_ratio
        det[:, 1:4:2] *= h_ratio
        human_detections[i] = torch.from_numpy(det[:, :4]).to(args.device)

    # Get img_norm_cfg
    img_norm_cfg = config['img_norm_cfg']
    if 'to_rgb' not in img_norm_cfg and 'to_bgr' in img_norm_cfg:
        to_bgr = img_norm_cfg.pop('to_bgr')
        img_norm_cfg['to_rgb'] = to_bgr
    img_norm_cfg['mean'] = np.array(img_norm_cfg['mean'])
    img_norm_cfg['std'] = np.array(img_norm_cfg['std'])

    # Build STDET model
    try:
        # In our spatiotemporal detection demo, different actions should have
        # the same number of bboxes.
        config['model']['test_cfg']['rcnn']['action_thr'] = .0
    except KeyError:
        pass

    config.model.backbone.pretrained = None
    model = build_detector(config.model, test_cfg=config.get('test_cfg'))

    load_checkpoint(model, args.checkpoint, map_location='cpu')
    model.to(args.device)
    model.eval()

    predictions = []

    print('Performing SpatioTemporal Action Detection for each clip')
    assert len(timestamps) == len(human_detections)
    prog_bar = mmcv.ProgressBar(len(timestamps))
    for timestamp, proposal in zip(timestamps, human_detections):
        if proposal.shape[0] == 0:
            predictions.append(None)
            continue

        start_frame = timestamp - (clip_len // 2 - 1) * frame_interval
        frame_inds = start_frame + np.arange(0, window_size, frame_interval)
        frame_inds = list(frame_inds - 1)
        imgs = [frames[ind].astype(np.float32) for ind in frame_inds]
        _ = [mmcv.imnormalize_(img, **img_norm_cfg) for img in imgs]
        # THWC -> CTHW -> 1CTHW
        input_array = np.stack(imgs).transpose((3, 0, 1, 2))[np.newaxis]
        input_tensor = torch.from_numpy(input_array).to(args.device)

        with torch.no_grad():
            result = model(
                return_loss=False,
                img=[input_tensor],
                img_metas=[[dict(img_shape=(new_h, new_w))]],
                proposals=[[proposal]])
            result = result[0]
            prediction = []
            # N proposals
            for i in range(proposal.shape[0]):
                prediction.append([])
            # Perform action score thr
            for i in range(len(result)):
                if i + 1 not in label_map:
                    continue
                for j in range(proposal.shape[0]):
                    if result[i][j, 4] > args.action_score_thr:
                        prediction[j].append((label_map[i + 1], result[i][j,
                                                                          4]))
            predictions.append(prediction)
        prog_bar.update()

    results = []
    for human_detection, prediction in zip(human_detections, predictions):
        results.append(pack_result(human_detection, prediction, new_h, new_w))

    def dense_timestamps(timestamps, n):
        """Make it nx frames."""
        old_frame_interval = (timestamps[1] - timestamps[0])
        start = timestamps[0] - old_frame_interval / n * (n - 1) / 2
        new_frame_inds = np.arange(
            len(timestamps) * n) * old_frame_interval / n + start
        return new_frame_inds.astype(np.int)

    dense_n = int(args.predict_stepsize / args.output_stepsize)
    frames = [
        cv2.imread(frame_paths[i - 1])
        for i in dense_timestamps(timestamps, dense_n)
    ]
    print('Performing visualization')
    vis_frames = visualize(frames, results)
    vid = mpy.ImageSequenceClip([x[:, :, ::-1] for x in vis_frames],
                                fps=args.output_fps)
    vid.write_videofile(args.out_filename)

    tmp_frame_dir = osp.dirname(frame_paths[0])
    shutil.rmtree(tmp_frame_dir)

コード例 #28

0

ファイルを表示

ファイル: tsd_max.py プロジェクト: JosieHong/SiamPolar

    def prepare_train_img(self, idx):
        img_info = self.img_infos[idx]
        img = mmcv.imread(osp.join(self.img_prefix[:-11],
                                   img_info['filename']))
        # corruption
        if self.corruption is not None:
            img = corrupt(img,
                          severity=self.corruption_severity,
                          corruption_name=self.corruption)
        # load proposals if necessary
        if self.proposals is not None:
            proposals = self.proposals[idx][:self.num_max_proposals]
            # TODO: Handle empty proposals properly. Currently images with
            # no proposals are just ignored, but they can be used for
            # training in concept.
            if len(proposals) == 0:
                return None
            if not (proposals.shape[1] == 4 or proposals.shape[1] == 5):
                raise AssertionError(
                    'proposals should have shapes (n, 4) or (n, 5), '
                    'but found {}'.format(proposals.shape))
            if proposals.shape[1] == 5:
                scores = proposals[:, 4, None]
                proposals = proposals[:, :4]
            else:
                scores = None

        ann = self.get_ann_info(idx)

        gt_bboxes = ann['bboxes']
        gt_labels = ann['labels']
        if self.with_crowd:
            gt_bboxes_ignore = ann['bboxes_ignore']

        # skip the image if there is no valid gt bbox
        if len(gt_bboxes) == 0 and self.skip_img_without_anno:
            warnings.warn('Skip the image "%s" that has no valid gt bbox' %
                          osp.join(self.img_prefix, img_info['filename']))
            return None

        # apply transforms
        flip = True if np.random.rand() < self.flip_ratio else False
        # randomly sample a scale
        img_scale = random_scale(self.img_scales, self.multiscale_mode)
        img, img_shape, pad_shape, scale_factor = self.img_transform(
            img, img_scale, flip, keep_ratio=self.resize_keep_ratio)

        img = img.copy()

        # get img_refer from first frame
        first_frame_idx = img_info["first_frame"]
        refer_info = self.img_infos[first_frame_idx]
        refer_ann = self.get_ann_info(first_frame_idx)
        img_refer = mmcv.imread(
            osp.join(self.img_prefix[:-11], refer_info['filename']))
        # crop the bbox
        img_refer = torch.squeeze(
            torch.Tensor(mmcv.imcrop(img_refer, refer_ann["bboxes"])))
        # resize to refer_scale
        img_refer = torch.Tensor(
            mmcv.imresize(np.float32(img_refer),
                          self.refer_scale,
                          return_scale=False)).permute(2, 0, 1)

        if self.with_seg:
            gt_seg = mmcv.imread(osp.join(
                self.seg_prefix, img_info['filename'].replace('jpg', 'png')),
                                 flag='unchanged')
            gt_seg = self.seg_transform(gt_seg.squeeze(), img_scale, flip)
            gt_seg = mmcv.imrescale(gt_seg,
                                    self.seg_scale_factor,
                                    interpolation='nearest')
            gt_seg = gt_seg[None, ...]
        if self.proposals is not None:
            proposals = self.bbox_transform(proposals, img_shape, scale_factor,
                                            flip)
            proposals = np.hstack([proposals, scores
                                   ]) if scores is not None else proposals
        gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor,
                                        flip)
        if self.with_crowd:
            gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape,
                                                   scale_factor, flip)
        if self.with_mask:
            gt_masks = self.mask_transform(ann['masks'], pad_shape,
                                           scale_factor, flip)

        ori_shape = (img_info['height'], img_info['width'], 3)
        img_meta = dict(ori_shape=ori_shape,
                        img_shape=img_shape,
                        pad_shape=pad_shape,
                        scale_factor=scale_factor,
                        flip=flip)

        data = dict(img=DC(to_tensor(img), stack=True),
                    img_meta=DC(img_meta, cpu_only=True),
                    gt_bboxes=DC(to_tensor(gt_bboxes)),
                    img_refer=DC(to_tensor(img_refer), stack=True))

        if self.with_label:
            data['gt_labels'] = DC(to_tensor(gt_labels))
        if self.with_crowd:
            data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
        if self.with_mask:
            data['gt_masks'] = DC(gt_masks, cpu_only=True)

        #--------------------offline ray label generation-----------------------------
        self.center_sample = True
        self.use_mask_center = True
        self.radius = 1.5

        featmap_sizes = self.get_featmap_size(pad_shape)
        # featmap_sizes: [[32, 32], [16, 16], [8, 8]]

        num_levels = len(self.strides)
        all_level_points = self.get_points(featmap_sizes)
        # level 0 points: torch.Size([1024, 2])
        # level 1 points: torch.Size([256, 2])
        # level 2 points: torch.Size([64, 2])

        self.num_points_per_level = [i.size()[0] for i in all_level_points]

        expanded_regress_ranges = [
            all_level_points[i].new_tensor(
                self.regress_ranges[i])[None].expand_as(all_level_points[i])
            for i in range(num_levels)
        ]
        concat_regress_ranges = torch.cat(expanded_regress_ranges, dim=0)
        concat_points = torch.cat(all_level_points, 0)
        gt_masks = gt_masks[:len(gt_bboxes)]

        gt_bboxes = torch.Tensor(gt_bboxes)
        gt_labels = torch.Tensor(gt_labels)

        _labels, _bbox_targets, _mask_targets = self.polar_target_single(
            gt_bboxes, gt_masks, gt_labels, concat_points,
            concat_regress_ranges, self.num_polar)

        data['_gt_labels'] = DC(_labels)
        data['_gt_bboxes'] = DC(_bbox_targets)
        data['_gt_masks'] = DC(_mask_targets)
        #--------------------offline ray label generation-----------------------------

        return data

コード例 #29

0

ファイルを表示

ファイル: test.py プロジェクト: zwl-max/mmdetection_clw

def single_gpu_test_processed_rect_img(model,
                                       data_loader,
                                       show=False,
                                       out_dir=None,
                                       show_score_thr=0.3):
    print('clw: using single_gpu_test_processed_rect_img() !!')
    model.eval()
    results = []
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, rescale=True, **data)

            ########### clw note: for debug
            # for idx, item in enumerate(result[0]):
            #     if item.size == 0:
            #         print('111')

            #    for row in item:
            #         print('boxw:', row[2] - row[0],  'boxh:', row[3] - row[1] )
            #         if row[2] - row[0] == 0 or row[3] - row[1] == 0:
            #             print('aaaa')
            #########

        ##
        img_name = data['img_metas'][0].data[0][0]['ori_filename']

        aaa = img_name[:-4].split('_')[-2:]
        x_rect_left = int(aaa[0])
        y_rect_up = int(aaa[1])

        for i in range(len(result[0])):
            ddd = []
            ccc = result[0][i][:, :4]  # (n, 4)
            if ccc.size == 0:
                continue
            for xyxy in ccc:
                x1 = xyxy[0] + x_rect_left
                y1 = xyxy[1] + y_rect_up
                x2 = xyxy[2] + x_rect_left
                y2 = xyxy[3] + y_rect_up
                cnt = np.array((x1, y1, x2, y2))
                ddd.append(cnt)
            ddd = np.array(ddd)

            result[0][
                i][:, :
                   4] = ddd  # result[0][i] = np.concatenate((fff, result[0][i][:, 4]), axis=1)
        ##

        batch_size = len(result)
        if show or out_dir:
            if batch_size == 1 and isinstance(data['img'][0], torch.Tensor):
                img_tensor = data['img'][0]
            else:
                img_tensor = data['img'][0].data[0]
            img_metas = data['img_metas'][0].data[0]
            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
            assert len(imgs) == len(img_metas)

            for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):
                h, w, _ = img_meta['img_shape']
                img_show = img[:h, :w, :]

                ori_h, ori_w = img_meta['ori_shape'][:-1]
                img_show = mmcv.imresize(img_show, (ori_w, ori_h))

                if out_dir:
                    out_file = osp.join(out_dir, img_meta['ori_filename'])
                else:
                    out_file = None

                model.module.show_result(img_show,
                                         result[i],
                                         show=show,
                                         out_file=out_file,
                                         score_thr=show_score_thr)

        # encode mask results
        if isinstance(result[0], tuple):
            result = [(bbox_results, encode_mask_results(mask_results))
                      for bbox_results, mask_results in result]
        results.extend(result)

        for _ in range(batch_size):
            prog_bar.update()
    return results

コード例 #30

0

ファイルを表示

ファイル: fcn_mask_head.py プロジェクト: yangxx1208/mmdetection-annotated

    def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg,
                      ori_shape, scale_factor, rescale):

        ipdb.set_trace()
        """Get segmentation masks from mask_pred and bboxes.

        Args:
            mask_pred (Tensor or ndarray): shape (n, #class+1, h, w).
                For single-scale testing, mask_pred is the direct output of
                model, whose type is Tensor, while for multi-scale testing,
                it will be converted to numpy array outside of this method.
            det_bboxes (Tensor): shape (n, 4/5)
            det_labels (Tensor): shape (n, )
            img_shape (Tensor): shape (3, )
            rcnn_test_cfg (dict): rcnn testing config
            ori_shape: original image size

        Returns:
            list[list]: encoded masks
        """
        if isinstance(mask_pred, torch.Tensor):
            mask_pred = mask_pred.sigmoid().cpu().numpy()
        assert isinstance(mask_pred, np.ndarray)

        cls_segms = [[] for _ in range(self.num_classes - 1)
                     ]  # mask不预测背景，coco为例此处80
        bboxes = det_bboxes.cpu().numpy()[:, :4]
        labels = det_labels.cpu().numpy() + 1

        if rescale:
            img_h, img_w = ori_shape[:2]
        else:
            img_h = np.round(ori_shape[0] * scale_factor).astype(np.int32)
            img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32)
            scale_factor = 1.0

        # 遍历检测出的每个物体
        for i in range(bboxes.shape[0]):
            bbox = (bboxes[i, :] / scale_factor).astype(np.int32)
            label = labels[
                i]  # 可以看出，虽然最终生成80类mask，但是是根据检测的label选mask的，也就是检测阶段的分类得分选择mask，不准确
            w = max(bbox[2] - bbox[0] + 1, 1)
            h = max(bbox[3] - bbox[1] + 1, 1)

            if not self.class_agnostic:
                # 为每类都预测mask,取出当前类别的28*28mask，进行进一步处理
                mask_pred_ = mask_pred[i, label, :, :]
            else:
                # 提供了mask只预测前景背景的接口
                mask_pred_ = mask_pred[i, 0, :, :]

            # 创建一个原图尺寸大小的矩阵
            im_mask = np.zeros((img_h, img_w), dtype=np.uint8)
            # 将28*28的特征图进行插值到bbox大小，然后根据阈值进行mask的二值化
            bbox_mask = mmcv.imresize(mask_pred_, (w, h))  # 将预测的gt box映射到原图尺寸
            bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype(
                np.uint8)  # 只在box尺寸内进行mask二值化处理
            im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] +
                    w] = bbox_mask  # im_mask就是个像素掩膜，分割出的地方是1，其他全是0，尺寸等同原图
            rle = mask_util.encode(
                np.array(im_mask[:, :, np.newaxis], order='F'))[0]
            cls_segms[label - 1].append(rle)

        return cls_segms