Example #1
0
    def prepare_train_img(self, idx):
        img_info = self.img_infos[idx]

        # load image
        img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))

        ann = self.get_ann_info(idx)
        gt_bboxes = ann['bboxes']
        gt_labels = ann['labels']
        if self.with_ignore:
            gt_bboxes_ignore = ann['bboxes_ignore']

        # skip the image if there is no valid gt bbox
        if len(gt_bboxes) == 0:
            return None

        # aug 1: apply extra augmentation
        if self.extra_aug is not None:
            img, gt_bboxes, gt_labels = \
                self.extra_aug(img, gt_bboxes, gt_labels)

        # aug 2: apply ordinary augmentations: flipping
        flip = True if np.random.rand() < self.flip_ratio else False

        # aug 3: apply ordinary augmentations: scaling
        img_scale = random_scale(self.img_scales, self.multiscale_mode)

        img, img_shape, pad_shape, scale_factor = \
            self.img_transform(
                img=img, scale=img_scale, flip=flip, pad_val=self.pad_values, keep_ratio=self.resize_keep_ratio
            )

        img = img.copy()
        gt_bboxes = self.bbox_transform(bboxes=gt_bboxes,
                                        img_shape=img_shape,
                                        scale_factor=scale_factor,
                                        flip=flip)
        if self.with_ignore:
            gt_bboxes_ignore = self.bbox_transform(bboxes=gt_bboxes_ignore,
                                                   img_shape=img_shape,
                                                   scale_factor=scale_factor,
                                                   flip=flip)

        img_meta = dict(ori_shape=(img_info['height'], img_info['width'], 3),
                        img_shape=img_shape,
                        pad_shape=pad_shape,
                        scale_factor=scale_factor,
                        flip=flip)

        data = dict(img=DataContainer(to_tensor(img), stack=True),
                    img_meta=DataContainer(data=img_meta, cpu_only=True),
                    gt_bboxes=DataContainer(data=to_tensor(gt_bboxes)))
        if self.with_ignore:
            data['gt_bboxes_ignore'] = DataContainer(
                data=to_tensor(gt_bboxes_ignore))
        if self.with_label:
            data['gt_labels'] = DataContainer(to_tensor(gt_labels))

        return data
    def prepare_train_img(self, idx):
        img_info = self.img_infos[idx]

        # load image
        img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))

        ann = self.get_ann_info(idx)
        gt_labels = np.zeros([len(self.CLASSES)], dtype=np.float32)
        for index, item in enumerate(self.CLASSES):
            gt_labels[index] = int(ann[item] == 1)

        # aug 1: apply extra augmentation
        if self.extra_aug is not None:
            img, gt_bboxes, gt_labels = \
                self.extra_aug(img, None, gt_labels)

        # aug 2: apply ordinary augmentations: flipping
        flip = True if np.random.rand() < self.flip_ratio else False

        # aug 3: apply ordinary augmentations: scaling
        img_scale = random_scale(self.img_scales, self.multiscale_mode)

        img, img_shape, pad_shape, scale_factor = \
            self.img_transform(
                img=img, scale=img_scale, flip=flip, pad_val=self.pad_values, keep_ratio=self.resize_keep_ratio
            )

        img = img.copy()

        img_meta = dict(ori_shape=(img_info['height'], img_info['width'], 3),
                        img_shape=img_shape,
                        pad_shape=pad_shape,
                        scale_factor=scale_factor,
                        flip=flip)

        data = dict(
            img=DataContainer(to_tensor(img), stack=True),
            img_meta=DataContainer(data=img_meta, cpu_only=True),
        )
        if self.with_label:
            data['gt_labels'] = DataContainer(to_tensor(gt_labels),
                                              stack=False)

        return data
    def prepare_test_img(self, idx):
        """Prepare an image for testing"""
        img_info = self.img_infos[idx]

        # load image
        img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))

        img, img_shape, pad_shape, scale_factor = \
            self.img_transform(
                img=img, scale=self.img_scales[0], flip=False, pad_val=self.pad_values, keep_ratio=self.resize_keep_ratio
            )
        img_meta = dict(
            ori_shape=(img_info['height'], img_info['width'], 3),
            img_shape=img_shape,
            pad_shape=pad_shape,
            scale_factor=scale_factor,
            flip=False
        )

        data = dict(
            img=DataContainer(to_tensor(img), stack=True),
            img_meta=DataContainer(img_meta, cpu_only=True)
        )
        return data
Example #4
0
def seq_collate(batch, samples_per_gpu=1):
    """ Modified from mmcv.collate
        Puts each data field into a tensor/DataContainer with outer dimension
        Seq Length.

    Extend default_collate to add support for
    :type:`~mmcv.parallel.DataContainer`. There are 3 cases.

    1. cpu_only = True, e.g., meta data
    2. cpu_only = False, stack = True, e.g., images tensors
    3. cpu_only = False, stack = False, e.g., gt bboxes
    """
    if not isinstance(batch, collections.Sequence):
        raise TypeError("{} is not supported.".format(batch.dtype))

    if isinstance(batch[0], DataContainer):
        assert len(batch) % samples_per_gpu == 0
        if batch[0].cpu_only:
            stacked = [sample.data for sample in batch]
            return DataContainer(stacked,
                                 batch[0].stack,
                                 batch[0].padding_value,
                                 cpu_only=True)
        elif batch[0].stack:
            assert isinstance(batch[0].data, torch.Tensor)

            if batch[0].pad_dims is not None:
                ndim = batch[0].dim()
                assert ndim > batch[0].pad_dims
                max_shape = [0 for _ in range(batch[0].pad_dims)]
                for dim in range(1, batch[0].pad_dims + 1):
                    max_shape[dim - 1] = batch[0].size(-dim)
                for sample in batch:
                    for dim in range(0, ndim - batch[0].pad_dims):
                        assert batch[0].size(dim) == sample.size(dim)
                    for dim in range(1, batch[0].pad_dims + 1):
                        max_shape[dim - 1] = max(max_shape[dim - 1],
                                                 sample.size(-dim))
                padded_samples = []
                for sample in batch:
                    pad = [0 for _ in range(batch[0].pad_dims * 2)]
                    for dim in range(1, batch[0].pad_dims + 1):
                        pad[2 * dim -
                            1] = max_shape[dim - 1] - sample.size(-dim)
                    padded_samples.append(
                        F.pad(sample.data, pad, value=sample.padding_value))
                stacked = default_collate(padded_samples)
            elif batch[0].pad_dims is None:
                stacked = default_collate([sample.data for sample in batch])
            else:
                raise ValueError(
                    'pad_dims should be either None or integers (1-3)')

        else:
            stacked = [sample.data for sample in batch]
        return DataContainer(stacked, batch[0].stack, batch[0].padding_value)
    elif isinstance(batch[0], collections.Sequence):
        transposed = zip(*batch)
        return [
            seq_collate(samples, samples_per_gpu) for samples in transposed
        ]
    elif isinstance(batch[0], collections.Mapping):
        collate_ret = {
            key: seq_collate([d[key] for d in batch], samples_per_gpu)
            for key in batch[0]
        }
        return collate_ret
    else:
        return default_collate(batch)
Example #5
0
def main():

    # load image
    img = mmcv.imread(IMG_PATH)
    img_height = img.shape[0]
    img_width = img.shape[1]

    # image pre-processing
    img_transform = ImageTransform(
        mean=IMG_TRANSFORM_CONFIG['mean'],
        std=IMG_TRANSFORM_CONFIG['std'],
        to_rgb=IMG_TRANSFORM_CONFIG['to_rgb'],
    )
    img, img_shape, pad_shape, scale_factor = \
        img_transform(
            img=img, scale=IMG_SCALE, flip=False, pad_val=IMG_TRANSFORM_CONFIG['pad_values'], keep_ratio=IMG_TRANSFORM_CONFIG['resize_keep_ratio']
        )
    img_meta = dict(ori_shape=(img_height, img_width, 3),
                    img_shape=img_shape,
                    pad_shape=pad_shape,
                    scale_factor=scale_factor,
                    flip=False)
    data = dict(img=DataContainer(to_tensor(img), stack=True),
                img_meta=DataContainer(img_meta, cpu_only=True))

    # define the model
    model = SSDDetector(
        # basic
        input_size=IMG_SCALE,
        num_classes=NUM_CLASS,
        in_channels=(512, 1024, 512, 256, 256),
        use_dropout=False,
        dropout_rate=None,
        # anchor generate
        anchor_ratios=([1 / 2.0, 1.0,
                        2.0], [1 / 3.0, 1 / 2.0, 1.0, 2.0,
                               3.0], [1 / 3.0, 1 / 2.0, 1.0, 2.0,
                                      3.0], [1 / 3.0, 1 / 2.0, 1.0, 2.0,
                                             3.0], [1 / 2.0, 1.0, 2.0]),
        anchor_strides=((16, 16), (16, 16), (30, 30), (60, 60), (100, 100)),
        basesizes=((12, 12), (16, 16), (24, 24), (30, 30), (36, 36)),
        allowed_border=-1,
        # regression
        target_means=(.0, .0, .0, .0),
        target_stds=(0.1, 0.1, 0.2, 0.2),
        # box assign
        pos_iou_thr=0.5,
        neg_iou_thr=0.5,
        min_pos_iou=0.,
        gt_max_assign_all=False,
        # sampling
        sampling=False,
        # balancing the loss
        neg_pos_ratio=3,
        # loss
        smoothl1_beta=1.,
        # inference nms
        nms_pre=-1,
        score_thr=0.02,
        min_size=100.0,
        max_scale_ratio=10.0,
        nms_cfg=['nms', 0.45, None],
        max_per_img=200,
        # device
        device='cpu',
    )

    # load checkpoint
    _ = load_checkpoint(model=model,
                        filename=CHECKPOINT_FILE,
                        map_location='cpu',
                        strict=True,
                        logger=None)

    # parallelize model
    model.eval()

    # results and progress bar
    # inference the data
    with torch.no_grad():
        result = model(is_test=True,
                       rescale=True,
                       img=data['img'].data.unsqueeze(0),
                       img_meta=(data['img_meta'].data, ))

    show_one_image(result[0], IMG_PATH, SAVE_PATH)
def collate(batch, samples_per_gpu=1):
    """Puts each data field into a tensor/DataContainer with outer dimension
    batch size.

    Extend default_collate to add support for
    :type:`~mmcv.parallel.DataContainer`. There are 3 cases.

    1. cpu_only = True, e.g., meta data
    2. cpu_only = False, stack = True, e.g., images tensors
    3. cpu_only = False, stack = False, e.g., gt bboxes
    """

    if not isinstance(batch, collections.Sequence):
        raise TypeError("{} is not supported.".format(batch.dtype))

    if isinstance(batch[0], DataContainer):
        assert len(batch) % samples_per_gpu == 0
        stacked = []
        if batch[0].cpu_only:
            for i in range(0, len(batch), samples_per_gpu):
                stacked.append(
                    [sample.data for sample in batch[i:i + samples_per_gpu]])
            return DataContainer(stacked,
                                 batch[0].stack,
                                 batch[0].padding_value,
                                 cpu_only=True)
        elif batch[0].stack:
            for i in range(0, len(batch), samples_per_gpu):
                assert isinstance(batch[i].data, torch.Tensor)
                if batch[i].dim() == 3:
                    # TODO: handle tensors other than 3d
                    assert batch[i].dim() == 3
                    #选择一个batch里面最大的 h w
                    c, h, w = batch[0].size()
                    for sample in batch[i:i + samples_per_gpu]:
                        assert c == sample.size(0)
                        h = max(h, sample.size(1))
                        w = max(w, sample.size(2))
                    padded_samples = [
                        F.pad(
                            sample.data,
                            (0, w - sample.size(2), 0,
                             h - sample.size(1)),  #在右侧和下册pad
                            value=sample.padding_value)
                        for sample in batch[i:i + samples_per_gpu]
                    ]

                    stacked.append(default_collate(padded_samples))
                elif batch[i].dim() == 1:
                    padded_samples = [
                        sample.data for sample in batch[i:i + samples_per_gpu]
                    ]
                    stacked.append(default_collate(padded_samples))

        else:
            for i in range(0, len(batch), samples_per_gpu):
                stacked.append(
                    [sample.data for sample in batch[i:i + samples_per_gpu]])
        return DataContainer(stacked, batch[0].stack, batch[0].padding_value)
    elif isinstance(batch[0], collections.Sequence):
        transposed = zip(*batch)
        return [collate(samples, samples_per_gpu) for samples in transposed]
    elif isinstance(batch[0], collections.Mapping):
        return {
            key: collate([d[key] for d in batch], samples_per_gpu)
            for key in batch[0]
        }
    else:
        return default_collate(batch)
    def prepare_train_img(self, idx):
        img_info = self.img_infos[idx]

        # load image
        img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))

        ann = self.get_ann_info(idx)
        gt_bboxes = ann['bboxes']
        gt_labels = ann['labels']

        # skip the image if there is no valid gt bbox
        if len(gt_bboxes) == 0:
            return None

        # aug 1: apply extra augmentation
        if self.extra_aug is not None:
            img, gt_bboxes, gt_labels = \
                self.extra_aug(img, gt_bboxes, gt_labels)
        # visualize augmented data
        # import string
        # import random
        # import os
        # temp_data = np.copy(img)
        # letters = string.ascii_lowercase
        # name = ''.join(random.choice(letters) for i in range(10))
        # dir_path = os.path.dirname(os.getcwd()) + '/MobileDentist'
        # work_dir = dir_path + '/work_dirs/dental_1009_w_pretrained_wt_fix_w_phonaugment/'
        # mmcv.imwrite(temp_data, work_dir + '{}.jpg'.format(name))

        # aug 2: apply ordinary augmentations: flipping
        flip = True if np.random.rand() < self.flip_ratio else False

        # aug 3: apply ordinary augmentations: scaling
        img_scale = random_scale(self.img_scales, self.multiscale_mode)

        img, img_shape, pad_shape, scale_factor = \
            self.img_transform(
                img=img, scale=img_scale, flip=flip, pad_val=self.pad_values, keep_ratio=self.resize_keep_ratio
            )

        img = img.copy()
        gt_bboxes = self.bbox_transform(
            bboxes=gt_bboxes, img_shape=img_shape, scale_factor=scale_factor, flip=flip
        )

        img_meta = dict(
            ori_shape=(img_info['height'], img_info['width'], 3),
            img_shape=img_shape,
            pad_shape=pad_shape,
            scale_factor=scale_factor,
            flip=flip
        )

        data = dict(
            img=DataContainer(to_tensor(img), stack=True),
            img_meta=DataContainer(data=img_meta, cpu_only=True),
            gt_bboxes=DataContainer(data=to_tensor(gt_bboxes)),
            gt_labels = DataContainer(to_tensor(gt_labels.astype(np.long)))
        )

        return data