Exemple #1
0
    def __init__(self,
                 image_sets,
                 flip=False,
                 proposal_files=None,
                 phase='train',
                 result_path=''):

        super(coco, self).__init__()

        image_dirs = {
            'train2014':
            os.path.join(config.dataset.dataset_path, 'coco_train2014'),
            'val2014':
            os.path.join(config.dataset.dataset_path, 'coco_val2014'),
            'minival2014':
            os.path.join(config.dataset.dataset_path, 'coco_val2014'),
            'valminusminival2014':
            os.path.join(config.dataset.dataset_path, 'coco_val2014'),
            'test2015':
            os.path.join(config.dataset.dataset_path, 'coco_test2015'),
            'test-dev2015':
            os.path.join(config.dataset.dataset_path, 'coco_test2015'),
            'train2017':
            os.path.join(config.dataset.dataset_path, 'images', 'train2017'),
            'val2017':
            os.path.join(config.dataset.dataset_path, 'images', 'val2017'),
            'test-dev2017':
            os.path.join(config.dataset.dataset_path, 'images', 'test2017'),
        }

        anno_files = {
            'train2014': 'instances_train2014.json',
            'val2014': 'instances_val2014.json',
            'minival2014': 'instances_minival2014.json',
            'valminusminival2014': 'instances_valminusminival2014.json',
            'test2015': 'image_info_test2015.json',
            'test-dev2015': 'image_info_test-dev2015.json',
            'train2017': 'instances_train2017.json',
            'val2017': 'instances_val2017.json',
            'test-dev2017': 'image_info_test-dev2017.json',
        }

        if image_sets[0] == 'test-dev2017':
            self.panoptic_json_file = os.path.join(
                config.dataset.dataset_path, 'annotations',
                'image_info_test-dev2017.json')
        else:
            self.panoptic_json_file = os.path.join(
                config.dataset.dataset_path, 'annotations',
                'panoptic_val2017_stff.json')
            self.panoptic_gt_folder = os.path.join(config.dataset.dataset_path,
                                                   'annotations',
                                                   'panoptic_val2017')

        if proposal_files is None:
            proposal_files = [None] * len(image_sets)

        if phase == 'train' and len(image_sets) > 1:
            # combine multiple datasets
            roidbs = []
            for image_set, proposal_file in zip(image_sets, proposal_files):
                dataset = JsonDataset('coco_' + image_set,
                                      image_dir=image_dirs[image_set],
                                      anno_file=os.path.join(
                                          config.dataset.dataset_path,
                                          'annotations',
                                          anno_files[image_set]))
                roidb = dataset.get_roidb(
                    gt=True,
                    proposal_file=proposal_file,
                    crowd_filter_thresh=config.train.crowd_filter_thresh)
                if flip:
                    if logger:
                        logger.info(
                            'Appending horizontally-flipped training examples...'
                        )
                    extend_with_flipped_entries(roidb, dataset)
                roidbs.append(roidb)
            roidb = roidbs[0]
            for r in roidbs[1:]:
                roidb.extend(r)
            roidb = filter_for_training(roidb)
            add_bbox_regression_targets(roidb)

        else:
            assert len(image_sets) == 1
            self.dataset = JsonDataset('coco_' + image_sets[0],
                                       image_dir=image_dirs[image_sets[0]],
                                       anno_file=os.path.join(
                                           config.dataset.dataset_path,
                                           'annotations',
                                           anno_files[image_sets[0]]))
            roidb = self.dataset.get_roidb(
                gt=True,
                proposal_file=proposal_files[0],
                crowd_filter_thresh=config.train.crowd_filter_thresh
                if phase != 'test' else 0)
            if flip:
                if logger:
                    logger.info(
                        'Appending horizontally-flipped training examples...')
                extend_with_flipped_entries(roidb, self.dataset)
            if phase != 'test':
                roidb = filter_for_training(roidb)
                add_bbox_regression_targets(roidb)

        self.roidb = roidb
        self.phase = phase
        self.flip = flip
        self.result_path = result_path
        self.num_classes = 81
Exemple #2
0
    def __init__(self,
                 image_sets,
                 flip=False,
                 proposal_files=None,
                 phase='train',
                 result_path=''):

        super(Cityscapes, self).__init__()

        self.image_dirs = {
            'train': os.path.join(config.dataset.dataset_path, 'images'),
            'val': os.path.join(config.dataset.dataset_path, 'images'),
            'test': os.path.join(config.dataset.dataset_path, 'images'),
            'train_extra': os.path.join(config.dataset.dataset_path, 'images'),
            'debug': os.path.join(config.dataset.dataset_path, 'images'),
        }

        self.anno_files = {
            'train': 'instancesonly_gtFine_train.json',
            'val': 'instancesonly_gtFine_val.json',
            'test': 'image_info_test.json',
            'train_extra': 'instancesonly_gtCoarse_train_extra.json',
            'debug': 'instancesonly_gtFine_debug.json',
        }

        self.panoptic_json_file = os.path.join(config.dataset.dataset_path,
                                               'annotations',
                                               'cityscapes_fine_val.json')
        self.panoptic_gt_folder = 'data/cityscapes/panoptic'

        self.flip = flip
        self.result_path = result_path
        self.num_classes = 9
        self.phase = phase
        self.image_sets = image_sets

        if image_sets[0] == 'demoVideo':
            assert len(image_sets) == 1
            assert phase == 'test'
            im_path = [
                _.strip()
                for _ in open('data/cityscapes/split/demoVideo_img.txt',
                              'r').readlines()
            ]
            self.roidb = [{'image': _, 'flipped': False} for _ in im_path]
            return

        if proposal_files is None:
            proposal_files = [None] * len(image_sets)

        if phase == 'train' and len(image_sets) > 1:
            # combine multiple datasets
            roidbs = []
            for image_set, proposal_file in zip(image_sets, proposal_files):
                dataset = JsonDataset('cityscapes_' + image_set,
                                      image_dir=self.image_dirs[image_set],
                                      anno_file=os.path.join(
                                          config.dataset.dataset_path,
                                          'annotations',
                                          self.anno_files[image_set]))
                roidb = dataset.get_roidb(
                    gt=True,
                    proposal_file=proposal_file,
                    crowd_filter_thresh=config.train.crowd_filter_thresh)
                if flip:
                    if logger:
                        logger.info(
                            'Appending horizontally-flipped training examples...'
                        )
                    extend_with_flipped_entries(roidb, dataset)
                roidbs.append(roidb)
            roidb = roidbs[0]
            for r in roidbs[1:]:
                roidb.extend(r)
            roidb = filter_for_training(roidb)
            add_bbox_regression_targets(roidb)

        else:
            assert len(image_sets) == 1
            self.dataset = JsonDataset(
                'cityscapes_' + image_sets[0],
                image_dir=self.image_dirs[image_sets[0]],
                anno_file=os.path.join(config.dataset.dataset_path,
                                       'annotations',
                                       self.anno_files[image_sets[0]]))
            roidb = self.dataset.get_roidb(
                gt=True,
                proposal_file=proposal_files[0],
                crowd_filter_thresh=config.train.crowd_filter_thresh
                if phase != 'test' else 0)
            if flip:
                if logger:
                    logger.info(
                        'Appending horizontally-flipped training examples...')
                extend_with_flipped_entries(roidb, self.dataset)
            if phase != 'test':
                roidb = filter_for_training(roidb)
                add_bbox_regression_targets(roidb)

        self.roidb = roidb
Exemple #3
0
class coco(BaseDataset):
    def __init__(self,
                 image_sets,
                 flip=False,
                 proposal_files=None,
                 phase='train',
                 result_path=''):

        super(coco, self).__init__()

        image_dirs = {
            'train2014':
            os.path.join(config.dataset.dataset_path, 'coco_train2014'),
            'val2014':
            os.path.join(config.dataset.dataset_path, 'coco_val2014'),
            'minival2014':
            os.path.join(config.dataset.dataset_path, 'coco_val2014'),
            'valminusminival2014':
            os.path.join(config.dataset.dataset_path, 'coco_val2014'),
            'test2015':
            os.path.join(config.dataset.dataset_path, 'coco_test2015'),
            'test-dev2015':
            os.path.join(config.dataset.dataset_path, 'coco_test2015'),
            'train2017':
            os.path.join(config.dataset.dataset_path, 'images', 'train2017'),
            'val2017':
            os.path.join(config.dataset.dataset_path, 'images', 'val2017'),
            'test-dev2017':
            os.path.join(config.dataset.dataset_path, 'images', 'test2017'),
        }

        anno_files = {
            'train2014': 'instances_train2014.json',
            'val2014': 'instances_val2014.json',
            'minival2014': 'instances_minival2014.json',
            'valminusminival2014': 'instances_valminusminival2014.json',
            'test2015': 'image_info_test2015.json',
            'test-dev2015': 'image_info_test-dev2015.json',
            'train2017': 'instances_train2017.json',
            'val2017': 'instances_val2017.json',
            'test-dev2017': 'image_info_test-dev2017.json',
        }

        if image_sets[0] == 'test-dev2017':
            self.panoptic_json_file = os.path.join(
                config.dataset.dataset_path, 'annotations',
                'image_info_test-dev2017.json')
        else:
            self.panoptic_json_file = os.path.join(
                config.dataset.dataset_path, 'annotations',
                'panoptic_val2017_stff.json')
            self.panoptic_gt_folder = os.path.join(config.dataset.dataset_path,
                                                   'annotations',
                                                   'panoptic_val2017')

        if proposal_files is None:
            proposal_files = [None] * len(image_sets)

        if phase == 'train' and len(image_sets) > 1:
            # combine multiple datasets
            roidbs = []
            for image_set, proposal_file in zip(image_sets, proposal_files):
                dataset = JsonDataset('coco_' + image_set,
                                      image_dir=image_dirs[image_set],
                                      anno_file=os.path.join(
                                          config.dataset.dataset_path,
                                          'annotations',
                                          anno_files[image_set]))
                roidb = dataset.get_roidb(
                    gt=True,
                    proposal_file=proposal_file,
                    crowd_filter_thresh=config.train.crowd_filter_thresh)
                if flip:
                    if logger:
                        logger.info(
                            'Appending horizontally-flipped training examples...'
                        )
                    extend_with_flipped_entries(roidb, dataset)
                roidbs.append(roidb)
            roidb = roidbs[0]
            for r in roidbs[1:]:
                roidb.extend(r)
            roidb = filter_for_training(roidb)
            add_bbox_regression_targets(roidb)

        else:
            assert len(image_sets) == 1
            self.dataset = JsonDataset('coco_' + image_sets[0],
                                       image_dir=image_dirs[image_sets[0]],
                                       anno_file=os.path.join(
                                           config.dataset.dataset_path,
                                           'annotations',
                                           anno_files[image_sets[0]]))
            roidb = self.dataset.get_roidb(
                gt=True,
                proposal_file=proposal_files[0],
                crowd_filter_thresh=config.train.crowd_filter_thresh
                if phase != 'test' else 0)
            if flip:
                if logger:
                    logger.info(
                        'Appending horizontally-flipped training examples...')
                extend_with_flipped_entries(roidb, self.dataset)
            if phase != 'test':
                roidb = filter_for_training(roidb)
                add_bbox_regression_targets(roidb)

        self.roidb = roidb
        self.phase = phase
        self.flip = flip
        self.result_path = result_path
        self.num_classes = 81

    def __len__(self):
        return len(self.roidb)

    def __getitem__(self, index):
        blob = defaultdict(list)
        im_blob, im_scales = self.get_image_blob([self.roidb[index]])
        if config.network.has_rpn:
            if self.phase != 'test':
                add_rpn_blobs(blob, im_scales, [self.roidb[index]])
                data = {'data': im_blob, 'im_info': blob['im_info']}
                label = {'roidb': blob['roidb'][0]}
                for stride in config.network.rpn_feat_stride:
                    label.update({
                        'rpn_labels_fpn{}'.format(stride):
                        blob['rpn_labels_int32_wide_fpn{}'.format(
                            stride)].astype(np.int64),
                        'rpn_bbox_targets_fpn{}'.format(stride):
                        blob['rpn_bbox_targets_wide_fpn{}'.format(stride)],
                        'rpn_bbox_inside_weights_fpn{}'.format(stride):
                        blob['rpn_bbox_inside_weights_wide_fpn{}'.format(
                            stride)],
                        'rpn_bbox_outside_weights_fpn{}'.format(stride):
                        blob['rpn_bbox_outside_weights_wide_fpn{}'.format(
                            stride)]
                    })
            else:
                data = {
                    'data':
                    im_blob,
                    'im_info':
                    np.array(
                        [[im_blob.shape[-2], im_blob.shape[-1], im_scales[0]]],
                        np.float32)
                }
                label = None
        else:
            raise NotImplementedError
        if config.network.has_fcn_head:
            if self.phase != 'test':
                seg_gt = np.array(
                    Image.open(self.roidb[index]['image'].replace(
                        'images', 'annotations'
                    ).replace(
                        'train2017',
                        'panoptic_train2017_semantic_trainid_stff').replace(
                            'val2017',
                            'panoptic_val2017_semantic_trainid_stff').replace(
                                'jpg', 'png')))
                if self.roidb[index]['flipped']:
                    seg_gt = np.fliplr(seg_gt)
                seg_gt = cv2.resize(seg_gt,
                                    None,
                                    None,
                                    fx=im_scales[0],
                                    fy=im_scales[0],
                                    interpolation=cv2.INTER_NEAREST)
                label.update({'seg_gt': seg_gt})
                label.update({
                    'gt_classes':
                    label['roidb']['gt_classes'][label['roidb']['is_crowd'] ==
                                                 0]
                })
                label.update({
                    'mask_gt':
                    np.zeros((len(label['gt_classes']), im_blob.shape[-2],
                              im_blob.shape[-1]))
                })
                idx = 0
                for i in range(len(label['roidb']['gt_classes'])):
                    if label['roidb']['is_crowd'][i] != 0:
                        continue
                    if type(label['roidb']['segms'][i]) is list and type(
                            label['roidb']['segms'][i][0]) is list:
                        img = Image.new(
                            'L',
                            (int(np.round(im_blob.shape[-1] / im_scales[0])),
                             int(np.round(im_blob.shape[-2] / im_scales[0]))),
                            0)
                        for j in range(len(label['roidb']['segms'][i])):
                            ImageDraw.Draw(img).polygon(tuple(
                                label['roidb']['segms'][i][j]),
                                                        outline=1,
                                                        fill=1)
                        label['mask_gt'][idx] = cv2.resize(
                            np.array(img),
                            None,
                            None,
                            fx=im_scales[0],
                            fy=im_scales[0],
                            interpolation=cv2.INTER_NEAREST)
                    else:
                        assert type(
                            label['roidb']['segms'][i]) is dict or type(
                                label['roidb']['segms'][i][0]) is dict
                        if type(label['roidb']['segms'][i]) is dict:
                            label['mask_gt'][idx] = cv2.resize(
                                mask_util.decode(
                                    mask_util.frPyObjects(
                                        [label['roidb']['segms'][i]],
                                        label['roidb']['segms'][i]['size'][0],
                                        label['roidb']['segms'][i]['size'][1]))
                                [:, :, 0],
                                None,
                                None,
                                fx=im_scales[0],
                                fy=im_scales[0],
                                interpolation=cv2.INTER_NEAREST)
                        else:
                            assert len(label['roidb']['segms'][i]) == 1
                            output = mask_util.decode(
                                label['roidb']['segms'][i])
                            label['mask_gt'][idx] = cv2.resize(
                                output[:, :, 0],
                                None,
                                None,
                                fx=im_scales[0],
                                fy=im_scales[0],
                                interpolation=cv2.INTER_NEAREST)
                    idx += 1
                if config.train.fcn_with_roi_loss:
                    gt_boxes = label['roidb']['boxes'][
                        np.where((label['roidb']['gt_classes'] > 0)
                                 & (label['roidb']['is_crowd'] == 0))[0]]
                    gt_boxes = np.around(gt_boxes * im_scales[0]).astype(
                        np.int32)
                    label.update({
                        'seg_roi_gt':
                        np.zeros((len(gt_boxes), config.network.mask_size,
                                  config.network.mask_size),
                                 dtype=np.int64)
                    })
                    for i in range(len(gt_boxes)):
                        if gt_boxes[i][3] == gt_boxes[i][1]:
                            gt_boxes[i][3] += 1
                        if gt_boxes[i][2] == gt_boxes[i][0]:
                            gt_boxes[i][2] += 1
                        label['seg_roi_gt'][i] = cv2.resize(
                            seg_gt[gt_boxes[i][1]:gt_boxes[i][3],
                                   gt_boxes[i][0]:gt_boxes[i][2]],
                            (config.network.mask_size,
                             config.network.mask_size),
                            interpolation=cv2.INTER_NEAREST)
            else:
                pass

        return data, label, index

    def evaluate_masks(
        self,
        all_boxes,
        all_segms,
        output_dir,
    ):
        res_file = os.path.join(
            output_dir, 'segmentations_' + self.dataset.name + '_results.json')
        results = []
        for cls_ind, cls in enumerate(self.dataset.classes):
            if cls == '__background__':
                continue
            if cls_ind >= len(all_boxes):
                break
            cat_id = self.dataset.category_to_id_map[cls]
            results.extend(
                self.segms_results_one_category(all_boxes[cls_ind],
                                                all_segms[cls_ind], cat_id))
        if logger:
            logger.info('Writing segmentation results json to: {}'.format(
                os.path.abspath(res_file)))
        with open(res_file, 'w') as fid:
            json.dump(results, fid)
        coco_dt = self.dataset.COCO.loadRes(str(res_file))
        coco_eval = COCOeval(self.dataset.COCO, coco_dt, 'segm')
        coco_eval.evaluate()
        coco_eval.accumulate()
        self.log_detection_eval_metrics(
            coco_eval, os.path.join(output_dir, 'detection_results.txt'))
        return coco_eval

    def evaluate_ssegs(self, pred_segmentations, res_file_folder):
        self.write_segmentation_result(pred_segmentations, res_file_folder)

        confusion_matrix = np.zeros(
            (config.dataset.num_seg_classes, config.dataset.num_seg_classes))
        for i, roidb in enumerate(self.roidb):

            seg_gt = np.array(
                Image.open(self.roidb[i]['image'].replace(
                    'images', 'annotations').replace(
                        'train2017',
                        'panoptic_train2017_semantic_trainid_stff').replace(
                            'val2017',
                            'panoptic_val2017_semantic_trainid_stff').replace(
                                'jpg', 'png'))).astype(np.float32)

            seg_pathes = os.path.split(roidb['image'])
            res_image_name = seg_pathes[-1]
            res_save_path = os.path.join(res_file_folder,
                                         res_image_name + '.png')

            seg_pred = Image.open(res_save_path)

            seg_pred = np.array(
                seg_pred.resize((seg_gt.shape[1], seg_gt.shape[0]),
                                Image.NEAREST))
            ignore_index = seg_gt != 255
            seg_gt = seg_gt[ignore_index]
            seg_pred = seg_pred[ignore_index]

            confusion_matrix += self.get_confusion_matrix(
                seg_gt, seg_pred, config.dataset.num_seg_classes)

        pos = confusion_matrix.sum(1)
        res = confusion_matrix.sum(0)
        tp = np.diag(confusion_matrix)

        IU_array = (tp / np.maximum(1.0, pos + res - tp))
        mean_IU = IU_array.mean()

        evaluation_results = {
            'meanIU': mean_IU,
            'IU_array': IU_array,
            'confusion_matrix': confusion_matrix
        }

        def convert_confusion_matrix(confusion_matrix):
            cls_sum = confusion_matrix.sum(axis=1)
            confusion_matrix = confusion_matrix / cls_sum.reshape((-1, 1))
            return confusion_matrix

        logger.info('evaluate segmentation:')
        meanIU = evaluation_results['meanIU']
        IU_array = evaluation_results['IU_array']
        confusion_matrix = convert_confusion_matrix(
            evaluation_results['confusion_matrix'])
        logger.info('IU_array:')
        for i in range(len(IU_array)):
            logger.info('%.5f' % IU_array[i])
        logger.info('meanIU:%.5f' % meanIU)
        np.set_printoptions(precision=3, suppress=True, linewidth=200)
        import re
        confusion_matrix = re.sub(
            '[\[\]]', '', np.array2string(confusion_matrix, separator='\t'))
        logger.info('confusion_matrix:')
        logger.info(confusion_matrix)

    def write_segmentation_result(self, segmentation_results, res_file_folder):
        """
        Write the segmentation result to result_file_folder
        :param segmentation_results: the prediction result
        :param result_file_folder: the saving folder
        :return: [None]
        """
        if not os.path.exists(res_file_folder):
            os.mkdir(res_file_folder)

        for i, roidb in enumerate(self.roidb):

            seg_pathes = os.path.split(roidb['image'])
            res_image_name = seg_pathes[-1]
            res_save_path = os.path.join(res_file_folder,
                                         res_image_name + '.png')

            segmentation_result = np.uint8(
                np.squeeze(np.copy(segmentation_results[i])))
            segmentation_result = Image.fromarray(segmentation_result)
            segmentation_result.save(res_save_path)
Exemple #4
0
class Cityscapes(BaseDataset):
    def __init__(self,
                 image_sets,
                 flip=False,
                 proposal_files=None,
                 phase='train',
                 result_path=''):

        super(Cityscapes, self).__init__()

        self.image_dirs = {
            'train': os.path.join(config.dataset.dataset_path, 'images'),
            'val': os.path.join(config.dataset.dataset_path, 'images'),
            'test': os.path.join(config.dataset.dataset_path, 'images'),
            'train_extra': os.path.join(config.dataset.dataset_path, 'images'),
            'debug': os.path.join(config.dataset.dataset_path, 'images'),
        }

        self.anno_files = {
            'train': 'instancesonly_gtFine_train.json',
            'val': 'instancesonly_gtFine_val.json',
            'test': 'image_info_test.json',
            'train_extra': 'instancesonly_gtCoarse_train_extra.json',
            'debug': 'instancesonly_gtFine_debug.json',
        }

        self.panoptic_json_file = os.path.join(config.dataset.dataset_path,
                                               'annotations',
                                               'cityscapes_fine_val.json')
        self.panoptic_gt_folder = 'data/cityscapes/panoptic'

        self.flip = flip
        self.result_path = result_path
        self.num_classes = 9
        self.phase = phase
        self.image_sets = image_sets

        if image_sets[0] == 'demoVideo':
            assert len(image_sets) == 1
            assert phase == 'test'
            im_path = [
                _.strip()
                for _ in open('data/cityscapes/split/demoVideo_img.txt',
                              'r').readlines()
            ]
            self.roidb = [{'image': _, 'flipped': False} for _ in im_path]
            return

        if proposal_files is None:
            proposal_files = [None] * len(image_sets)

        if phase == 'train' and len(image_sets) > 1:
            # combine multiple datasets
            roidbs = []
            for image_set, proposal_file in zip(image_sets, proposal_files):
                dataset = JsonDataset('cityscapes_' + image_set,
                                      image_dir=self.image_dirs[image_set],
                                      anno_file=os.path.join(
                                          config.dataset.dataset_path,
                                          'annotations',
                                          self.anno_files[image_set]))
                roidb = dataset.get_roidb(
                    gt=True,
                    proposal_file=proposal_file,
                    crowd_filter_thresh=config.train.crowd_filter_thresh)
                if flip:
                    if logger:
                        logger.info(
                            'Appending horizontally-flipped training examples...'
                        )
                    extend_with_flipped_entries(roidb, dataset)
                roidbs.append(roidb)
            roidb = roidbs[0]
            for r in roidbs[1:]:
                roidb.extend(r)
            roidb = filter_for_training(roidb)
            add_bbox_regression_targets(roidb)

        else:
            assert len(image_sets) == 1
            self.dataset = JsonDataset(
                'cityscapes_' + image_sets[0],
                image_dir=self.image_dirs[image_sets[0]],
                anno_file=os.path.join(config.dataset.dataset_path,
                                       'annotations',
                                       self.anno_files[image_sets[0]]))
            roidb = self.dataset.get_roidb(
                gt=True,
                proposal_file=proposal_files[0],
                crowd_filter_thresh=config.train.crowd_filter_thresh
                if phase != 'test' else 0)
            if flip:
                if logger:
                    logger.info(
                        'Appending horizontally-flipped training examples...')
                extend_with_flipped_entries(roidb, self.dataset)
            if phase != 'test':
                roidb = filter_for_training(roidb)
                add_bbox_regression_targets(roidb)

        self.roidb = roidb

    def __getitem__(self, index):
        blob = defaultdict(list)
        im_blob, im_scales = self.get_image_blob([self.roidb[index]])
        if config.network.has_rpn:
            if self.phase != 'test':
                add_rpn_blobs(blob, im_scales, [self.roidb[index]])
                data = {'data': im_blob, 'im_info': blob['im_info']}
                label = {'roidb': blob['roidb'][0]}
                for stride in config.network.rpn_feat_stride:
                    label.update({
                        'rpn_labels_fpn{}'.format(stride):
                        blob['rpn_labels_int32_wide_fpn{}'.format(
                            stride)].astype(np.int64),
                        'rpn_bbox_targets_fpn{}'.format(stride):
                        blob['rpn_bbox_targets_wide_fpn{}'.format(stride)],
                        'rpn_bbox_inside_weights_fpn{}'.format(stride):
                        blob['rpn_bbox_inside_weights_wide_fpn{}'.format(
                            stride)],
                        'rpn_bbox_outside_weights_fpn{}'.format(stride):
                        blob['rpn_bbox_outside_weights_wide_fpn{}'.format(
                            stride)]
                    })
            else:
                data = {
                    'data':
                    im_blob,
                    'im_info':
                    np.array(
                        [[im_blob.shape[-2], im_blob.shape[-1], im_scales[0]]],
                        np.float32),
                }
                label = {'roidb': self.roidb[index]}
        else:
            if self.phase != 'test':
                frcn_blob = sample_rois(self.roidb[index], im_scales, 0)

                data = {
                    'data':
                    im_blob,
                    'im_info':
                    np.array(
                        [[im_blob.shape[-2], im_blob.shape[-1], im_scales[0]]],
                        np.float32)
                }
                label = {
                    'rois':
                    frcn_blob['rois'].astype(np.float32),
                    'cls_label':
                    frcn_blob['labels_int32'].astype(np.int64),
                    'bbox_target':
                    frcn_blob['bbox_targets'].astype(np.float32),
                    'bbox_inside_weight':
                    frcn_blob['bbox_inside_weights'].astype(np.float32),
                    'bbox_outside_weight':
                    frcn_blob['bbox_outside_weights'].astype(np.float32),
                    'mask_rois':
                    frcn_blob['mask_rois'].astype(np.float32),
                    'mask_target':
                    frcn_blob['mask_int32'].astype(np.float32)
                }
            else:
                data = {
                    'data':
                    im_blob,
                    'rois':
                    np.hstack((np.zeros(
                        (self.roidb[index]['boxes'].shape[0], 1)),
                               self.roidb[index]['boxes'])).astype(np.float32),
                    'im_info':
                    np.array(
                        [[im_blob.shape[-2], im_blob.shape[-1], im_scales[0]]],
                        np.float32),
                    'id':
                    self.roidb[index]['id']
                }
                label = None
        if config.network.has_fcn_head:
            if self.phase != 'test':
                seg_gt = np.array(
                    Image.open(self.roidb[index]['image'].replace(
                        'images',
                        'labels').replace('leftImg8bit.png',
                                          'gtFine_labelTrainIds.png')))
                if self.roidb[index]['flipped']:
                    seg_gt = np.fliplr(seg_gt)
                seg_gt = cv2.resize(seg_gt,
                                    None,
                                    None,
                                    fx=im_scales[0],
                                    fy=im_scales[0],
                                    interpolation=cv2.INTER_NEAREST)
                label.update({'seg_gt': seg_gt})
                label.update({'gt_classes': label['roidb']['gt_classes']})
                label.update({
                    'mask_gt':
                    np.zeros((len(label['gt_classes']), im_blob.shape[-2],
                              im_blob.shape[-1]))
                })
                for i in range(len(label['gt_classes'])):
                    img = Image.new('L',
                                    (int(im_blob.shape[-1] / im_scales[0]),
                                     int(im_blob.shape[-2] / im_scales[0])), 0)
                    for j in range(len(label['roidb']['segms'][i])):
                        ImageDraw.Draw(img).polygon(tuple(
                            label['roidb']['segms'][i][j]),
                                                    outline=1,
                                                    fill=1)
                    label['mask_gt'][i] = cv2.resize(
                        np.array(img),
                        None,
                        None,
                        fx=im_scales[0],
                        fy=im_scales[0],
                        interpolation=cv2.INTER_NEAREST)
                if config.train.fcn_with_roi_loss:
                    gt_boxes = label['roidb']['boxes'][np.where(
                        label['roidb']['gt_classes'] > 0)[0]]
                    gt_boxes = np.around(gt_boxes * im_scales[0]).astype(
                        np.int32)
                    label.update({
                        'seg_roi_gt':
                        np.zeros((len(gt_boxes), config.network.mask_size,
                                  config.network.mask_size),
                                 dtype=np.int64)
                    })
                    for i in range(len(gt_boxes)):
                        if gt_boxes[i][3] == gt_boxes[i][1]:
                            gt_boxes[i][3] += 1
                        if gt_boxes[i][2] == gt_boxes[i][0]:
                            gt_boxes[i][2] += 1
                        label['seg_roi_gt'][i] = cv2.resize(
                            seg_gt[gt_boxes[i][1]:gt_boxes[i][3],
                                   gt_boxes[i][0]:gt_boxes[i][2]],
                            (config.network.mask_size,
                             config.network.mask_size),
                            interpolation=cv2.INTER_NEAREST)
            else:
                pass

        return data, label, index

    def get_image_blob(self, roidb):
        """Builds an input blob from the images in the roidb at the specified
        scales.
        """
        num_images = len(roidb)
        # Sample random scales to use for each image in this batch
        if self.phase == 'train':
            scale_inds = np.random.randint(0,
                                           high=len(config.train.scales),
                                           size=num_images)
        else:
            scale_inds = np.random.randint(0,
                                           high=len(config.test.scales),
                                           size=num_images)
        processed_ims = []
        im_scales = []
        for i in range(num_images):
            im = cv2.imread(roidb[i]['image'])
            assert im is not None, \
                'Failed to read image \'{}\''.format(roidb[i]['image'])
            if roidb[i]['flipped']:
                im = im[:, ::-1, :]
            if self.phase == 'train':
                target_size = config.train.scales[scale_inds[i]]
                im, im_scale = self.prep_im_for_blob(
                    im, config.network.pixel_means, [target_size],
                    config.train.max_size)
            else:
                target_size = config.test.scales[scale_inds[i]]
                im, im_scale = self.prep_im_for_blob(
                    im, config.network.pixel_means, [target_size],
                    config.test.max_size)
            im_scales.append(im_scale[0])
            processed_ims.append(im[0].transpose(2, 0, 1))

        # Create a blob to hold the input images
        assert len(processed_ims) == 1
        blob = processed_ims[0]

        return blob, im_scales

    def vis_all_mask(self, all_boxes, all_masks, save_path=None):
        """
        visualize all detections in one image
        :param im_array: [b=1 c h w] in rgb
        :param detections: [ numpy.ndarray([[x1 y1 x2 y2 score]]) for j in classes ]
        :param class_names: list of names in imdb
        :param scale: visualize the scaled image
        :return:
        """
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
        from matplotlib.patches import Polygon
        import random
        import cv2
        palette = {
            'person': (220, 20, 60),
            'rider': (255, 0, 0),
            'car': (0, 0, 142),
            'truck': (0, 0, 70),
            'bus': (0, 60, 100),
            'train': (0, 80, 100),
            'motorcycle': (0, 0, 230),
            'bicycle': (119, 11, 32),
            #
            'road': (128, 64, 128),
            'sidewalk': (244, 35, 232),
            'building': (70, 70, 70),
            'wall': (102, 102, 156),
            'fence': (190, 153, 153),
            'pole': (153, 153, 153),
            'sky': (70, 130, 180),
            'traffic light': (250, 170, 30),
            'traffic sign': (220, 220, 0),
            'vegetation': (107, 142, 35),
            'terrain': (152, 251, 152)
        }
        name2id = {
            'road': 0,
            'sidewalk': 1,
            'building': 2,
            'wall': 3,
            'fence': 4,
            'pole': 5,
            'traffic light': 6,
            'traffic sign': 7,
            'vegetation': 8,
            'terrain': 9,
            'sky': 10
        }

        self.classes = [
            '__background__',
            'person',
            'bicycle',
            'train',
            'truck',
            'motorcycle',
            'bus',
            'car',
            'rider',
        ]

        if save_path is not None:
            os.makedirs(save_path, exist_ok=True)

        for i in range(len(self.roidb)):

            im = np.array(Image.open(self.roidb[i]['image']))
            fig = plt.figure(frameon=False)

            fig.set_size_inches(im.shape[1] / 200, im.shape[0] / 200)
            ax = plt.Axes(fig, [0., 0., 1., 1.])
            ax.axis('off')
            fig.add_axes(ax)
            ax.imshow(im)
            for j, name in enumerate(self.classes):
                if name == '__background__':
                    continue
                boxes = all_boxes[j][i]
                segms = all_masks[j][i]
                if segms == []:
                    continue
                masks = mask_util.decode(segms)
                for k in range(boxes.shape[0]):
                    score = boxes[k, -1]
                    mask = masks[:, :, k]
                    if score < 0.5:
                        continue
                    bbox = boxes[k, :]
                    ax.add_patch(
                        plt.Rectangle((bbox[0], bbox[1]),
                                      bbox[2] - bbox[0],
                                      bbox[3] - bbox[1],
                                      fill=False,
                                      edgecolor='g',
                                      linewidth=1,
                                      alpha=0.5))
                    ax.text(bbox[0],
                            bbox[1] - 2,
                            name + '{:0.2f}'.format(score).lstrip('0'),
                            fontsize=5,
                            family='serif',
                            bbox=dict(facecolor='g',
                                      alpha=0.4,
                                      pad=0,
                                      edgecolor='none'),
                            color='white')
                    _, contour, hier = cv2.findContours(
                        mask.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
                    color = (palette[name][0] / 255, palette[name][1] / 255,
                             palette[name][2] / 255)
                    for c in contour:
                        ax.add_patch(
                            Polygon(c.reshape((-1, 2)),
                                    fill=True,
                                    facecolor=color,
                                    edgecolor='w',
                                    linewidth=0.8,
                                    alpha=0.5))
            if save_path is None:
                plt.show()
            else:
                fig.savefig(os.path.join(
                    save_path, '{}.png'.format(
                        self.roidb[i]['image'].split('/')[-1][:-16])),
                            dpi=200)
            plt.close('all')

    def evaluate_masks(
        self,
        all_boxes,
        all_segms,
        output_dir,
    ):
        res_file = os.path.join(
            output_dir, 'segmentations_' + self.dataset.name + '_results')
        res_file += '.json'

        os.environ['CITYSCAPES_DATASET'] = os.path.join(
            os.path.dirname(__file__), '../../data/cityscapes')
        os.environ['CITYSCAPES_RESULTS'] = os.path.join(output_dir, 'inst_seg')
        sys.path.insert(
            0,
            os.path.join(os.path.abspath(os.path.dirname(__file__)), '..',
                         '..', 'lib', 'dataset_devkit', 'cityscapesScripts'))
        sys.path.insert(
            0,
            os.path.join(os.path.abspath(os.path.dirname(__file__)), '..',
                         '..', 'lib', 'dataset_devkit', 'cityscapesScripts',
                         'cityscapesscripts', 'evaluation'))

        # Load the Cityscapes eval script *after* setting the required env vars,
        # since the script reads their values into global variables (at load time).
        import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling \
            as cityscapes_eval
        sys.argv = []

        roidb = self.dataset.get_roidb()
        for i, entry in enumerate(roidb):
            im_name = entry['image']

            basename = os.path.splitext(os.path.basename(im_name))[0]
            txtname = os.path.join(output_dir, 'inst_seg',
                                   basename + 'pred.txt')
            os.makedirs(os.path.join(output_dir, 'inst_seg'), exist_ok=True)
            with open(txtname, 'w') as fid_txt:
                for j in range(1, len(all_segms)):
                    clss = self.dataset.classes[j]
                    clss_id = cityscapes_eval.name2label[clss].id
                    segms = all_segms[j][i]
                    boxes = all_boxes[j][i]
                    if segms == []:
                        continue
                    masks = mask_util.decode(segms)

                    for k in range(boxes.shape[0]):
                        score = boxes[k, -1]
                        mask = masks[:, :, k]
                        pngname = os.path.join(
                            'seg_results', basename,
                            basename + '_' + clss + '_{}.png'.format(k))
                        # write txt
                        fid_txt.write('{} {} {}\n'.format(
                            pngname, clss_id, score))
                        # save mask
                        os.makedirs(os.path.join(output_dir, 'inst_seg',
                                                 'seg_results', basename),
                                    exist_ok=True)
                        cv2.imwrite(
                            os.path.join(output_dir, 'inst_seg', pngname),
                            mask * 255)
        cityscapes_eval.main()
        return None

    def get_pallete(self):

        pallete_raw = np.zeros((256, 3)).astype('uint8')
        pallete = np.zeros((256, 3)).astype('uint8')

        pallete_raw[5, :] = [111, 74, 0]
        pallete_raw[6, :] = [81, 0, 81]
        pallete_raw[7, :] = [128, 64, 128]
        pallete_raw[8, :] = [244, 35, 232]
        pallete_raw[9, :] = [250, 170, 160]
        pallete_raw[10, :] = [230, 150, 140]
        pallete_raw[11, :] = [70, 70, 70]
        pallete_raw[12, :] = [102, 102, 156]
        pallete_raw[13, :] = [190, 153, 153]
        pallete_raw[14, :] = [180, 165, 180]
        pallete_raw[15, :] = [150, 100, 100]
        pallete_raw[16, :] = [150, 120, 90]
        pallete_raw[17, :] = [153, 153, 153]
        pallete_raw[18, :] = [153, 153, 153]
        pallete_raw[19, :] = [250, 170, 30]
        pallete_raw[20, :] = [220, 220, 0]
        pallete_raw[21, :] = [107, 142, 35]
        pallete_raw[22, :] = [152, 251, 152]
        pallete_raw[23, :] = [70, 130, 180]
        pallete_raw[24, :] = [220, 20, 60]
        pallete_raw[25, :] = [255, 0, 0]
        pallete_raw[26, :] = [0, 0, 142]
        pallete_raw[27, :] = [0, 0, 70]
        pallete_raw[28, :] = [0, 60, 100]
        pallete_raw[29, :] = [0, 0, 90]
        pallete_raw[30, :] = [0, 0, 110]
        pallete_raw[31, :] = [0, 80, 100]
        pallete_raw[32, :] = [0, 0, 230]
        pallete_raw[33, :] = [119, 11, 32]

        train2regular = [
            7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31,
            32, 33
        ]

        for i in range(len(train2regular)):
            pallete[i, :] = pallete_raw[train2regular[i], :]

        pallete = pallete.reshape(-1)

        # return pallete_raw
        return pallete

    def evaluate_ssegs(self, pred_segmentations, res_file_folder):
        self.write_segmentation_result(pred_segmentations, res_file_folder)

        confusion_matrix = np.zeros(
            (config.dataset.num_seg_classes, config.dataset.num_seg_classes))
        for i, roidb in enumerate(self.roidb):

            seg_gt = np.array(
                Image.open(roidb['image'].replace('images', 'labels').replace(
                    'leftImg8bit.png',
                    'gtFine_labelTrainIds.png'))).astype('float32')

            seg_pathes = os.path.split(roidb['image'].replace(
                'images', 'labels').replace('leftImg8bit.png',
                                            'gtFine_labelTrainIds.png'))
            res_image_name = seg_pathes[-1][:-len('_gtFine_labelTrainIds.png')]
            res_save_path = os.path.join(res_file_folder,
                                         res_image_name + '.png')

            seg_pred = Image.open(res_save_path)

            seg_pred = np.array(
                seg_pred.resize((seg_gt.shape[1], seg_gt.shape[0]),
                                Image.NEAREST))
            ignore_index = seg_gt != 255
            seg_gt = seg_gt[ignore_index]
            seg_pred = seg_pred[ignore_index]

            confusion_matrix += self.get_confusion_matrix(
                seg_gt, seg_pred, config.dataset.num_seg_classes)

        pos = confusion_matrix.sum(1)
        res = confusion_matrix.sum(0)
        tp = np.diag(confusion_matrix)

        IU_array = (tp / np.maximum(1.0, pos + res - tp))
        mean_IU = IU_array.mean()

        evaluation_results = {
            'meanIU': mean_IU,
            'IU_array': IU_array,
            'confusion_matrix': confusion_matrix
        }

        def convert_confusion_matrix(confusion_matrix):
            cls_sum = confusion_matrix.sum(axis=1)
            confusion_matrix = confusion_matrix / cls_sum.reshape((-1, 1))
            return confusion_matrix

        logger.info('evaluate segmentation:')
        meanIU = evaluation_results['meanIU']
        IU_array = evaluation_results['IU_array']
        confusion_matrix = convert_confusion_matrix(
            evaluation_results['confusion_matrix'])
        logger.info('IU_array:')
        for i in range(len(IU_array)):
            logger.info('%.5f' % IU_array[i])
        logger.info('meanIU:%.5f' % meanIU)
        np.set_printoptions(precision=3, suppress=True, linewidth=200)
        import re
        confusion_matrix = re.sub(
            '[\[\]]', '', np.array2string(confusion_matrix, separator='\t'))
        logger.info('confusion_matrix:')
        logger.info(confusion_matrix)

    def write_segmentation_result(self, segmentation_results, res_file_folder):
        """
        Write the segmentation result to result_file_folder
        :param segmentation_results: the prediction result
        :param result_file_folder: the saving folder
        :return: [None]
        """
        if not os.path.exists(res_file_folder):
            os.mkdir(res_file_folder)

        pallete = self.get_pallete()
        for i, roidb in enumerate(self.roidb):

            seg_pathes = os.path.split(roidb['image'])
            res_image_name = seg_pathes[-1][:-len('_leftImg8bit.png')]
            res_save_path = os.path.join(res_file_folder,
                                         res_image_name + '.png')

            segmentation_result = np.uint8(
                np.squeeze(np.copy(segmentation_results[i])))
            segmentation_result = Image.fromarray(segmentation_result)
            segmentation_result.putpalette(pallete)
            segmentation_result.save(res_save_path)