Ejemplo n.º 1
0
    def test_augmentation_list(self):
        input_shape = (100, 100)
        image = np.random.rand(*input_shape).astype("float32")
        sem_seg = (np.random.rand(*input_shape) < 0.5).astype("uint8")
        inputs = T.StandardAugInput(image, sem_seg=sem_seg)  # provide two args

        augs = T.AugmentationList([T.RandomFlip(), T.Resize(20)])
        _ = T.AugmentationList([augs, T.Resize(30)])(inputs)
Ejemplo n.º 2
0
    def __init__(
        self,
        is_train: bool,
        *,
        augmentations: List[Union[T.Augmentation, T.Transform]],
        image_format: str,
        # Extra data augmentation for point supervision
        sample_points: int = 0,
    ):
        """
        NOTE: this interface is experimental.

        Args:
            is_train: whether it's used in training or inference
            augmentations: a list of augmentations or deterministic transforms to apply
            image_format: an image format supported by :func:`detection_utils.read_image`.
            sample_points: subsample points at each iteration
        """
        # fmt: off
        self.is_train               = is_train
        self.augmentations          = T.AugmentationList(augmentations)
        self.image_format           = image_format
        self.sample_points          = sample_points
        # fmt: on
        logger = logging.getLogger(__name__)
        mode = "training" if is_train else "inference"
        logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}")
        logger.info(f"Point Augmentations used in {mode}: sample {sample_points} points")
Ejemplo n.º 3
0
    def test_print_augmentation(self):
        t = T.RandomCrop("relative", (100, 100))
        self.assertEqual(str(t), "RandomCrop(crop_type='relative', crop_size=(100, 100))")

        t0 = T.RandomFlip(prob=0.5)
        self.assertEqual(str(t0), "RandomFlip(prob=0.5)")

        t1 = T.RandomFlip()
        self.assertEqual(str(t1), "RandomFlip()")

        t = T.AugmentationList([t0, t1])
        self.assertEqual(str(t), f"AugmentationList[{t0}, {t1}]")
Ejemplo n.º 4
0
    def __init__(
            self,
            is_train: bool,
            augmentations: List[Union[T.Augmentation, T.Transform]],
            image_format: str):

        # fmt: off
        self.is_train = is_train
        self.augmentations = T.AugmentationList(augmentations)
        self.image_format = image_format
        # fmt: on
        logger = logging.getLogger(__name__)
        mode = "training" if is_train else "inference"
        logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}")
Ejemplo n.º 5
0
    def __init__(self,
                 is_train: bool,
                 *,
                 augmentations: List[Union[T.Augmentation, T.Transform]],
                 image_format: str,
                 mosaic_trans: Optional[CfgNode],
                 use_instance_mask: bool = False,
                 use_keypoint: bool = False,
                 instance_mask_format: str = "polygon",
                 recompute_boxes: bool = False,
                 add_meta_infos: bool = False):
        """
        Args:
            augmentations: a list of augmentations or deterministic
                transforms to apply
            image_format: an image format supported by
                :func:`detection_utils.read_image`.
            mosaic_trans: a CfgNode for Mosaic transformation.
            use_instance_mask: whether to process instance segmentation
                annotations, if available
            use_keypoint: whether to process keypoint annotations if available
            instance_mask_format: one of "polygon" or "bitmask". Process
                instance segmentation masks into this format.
            recompute_boxes: whether to overwrite bounding box annotations
                by computing tight bounding boxes from instance mask
                annotations.
            add_meta_infos: whether to add `meta_infos` field
        """
        if recompute_boxes:
            assert use_instance_mask, "recompute_boxes requires instance masks"
        # fmt: off
        self.is_train = is_train
        self.augmentations = T.AugmentationList(augmentations)
        self.image_format = image_format
        self.use_instance_mask = use_instance_mask
        self.instance_mask_format = instance_mask_format
        self.use_keypoint = use_keypoint
        self.recompute_boxes = recompute_boxes
        self.add_meta_infos = add_meta_infos
        # fmt: on
        logger = logging.getLogger(__name__)
        mode = "training" if is_train else "inference"
        logger.info(
            f"[DatasetMapper] Augmentations used in {mode}: {augmentations}")

        self.mosaic_trans = mosaic_trans
        if self.mosaic_trans.ENABLED:
            self.mosaic_pool = deque(
                maxlen=self.mosaic_trans.POOL_CAPACITY)
Ejemplo n.º 6
0
    def __init__(self, cfg, is_train: bool = True):
        aug_kwargs = cfg.aug_kwargs
        aug_list = [
            # T.Resize((800, 800)),
        ]
        if is_train:
            aug_list.extend([
                getattr(T, name)(**kwargs)
                for name, kwargs in aug_kwargs.items()
            ])
        self.augmentations = T.AugmentationList(aug_list)
        self.is_train = is_train

        mode = "training" if is_train else "inference"
        print(
            f"[MyDatasetMapper] Augmentations used in {mode}: {self.augmentations}"
        )
Ejemplo n.º 7
0
    def __init__(
        self,
        is_train: bool,
        *,
        augmentations: List[Union[T.Augmentation, T.Transform]],
        image_format: str,
        use_instance_mask: bool = False,
        use_keypoint: bool = False,
        instance_mask_format: str = "polygon",
        keypoint_hflip_indices: Optional[np.ndarray] = None,
        precomputed_proposal_topk: Optional[int] = None,
        recompute_boxes: bool = False
    ):
        """
        NOTE: this interface is experimental.

        Args:
            is_train: whether it's used in training or inference
            augmentations: a list of augmentations or deterministic transforms to apply
            image_format: an image format supported by :func:`detection_utils.read_image`.
            use_instance_mask: whether to process instance segmentation annotations, if available
            use_keypoint: whether to process keypoint annotations if available
            instance_mask_format: one of "polygon" or "bitmask". Process instance segmentation
                masks into this format.
            keypoint_hflip_indices: see :func:`detection_utils.create_keypoint_hflip_indices`
            precomputed_proposal_topk: if given, will load pre-computed
                proposals from dataset_dict and keep the top k proposals for each image.
            recompute_boxes: whether to overwrite bounding box annotations
                by computing tight bounding boxes from instance mask annotations.
        """
        if recompute_boxes:
            assert use_instance_mask, "recompute_boxes requires instance masks"
        # fmt: off
        self.is_train               = is_train
        self.augmentations          = T.AugmentationList(augmentations)
        self.image_format           = image_format
        self.use_instance_mask      = use_instance_mask
        self.instance_mask_format   = instance_mask_format
        self.use_keypoint           = use_keypoint
        self.keypoint_hflip_indices = keypoint_hflip_indices
        self.proposal_topk          = precomputed_proposal_topk
        self.recompute_boxes        = recompute_boxes
        # fmt: on
        logger = logging.getLogger(__name__)
        logger.info("Augmentations used in training: " + str(augmentations))
 def __init__(self,
              project,
              fileServer,
              augmentations,
              is_train,
              image_format='BGR',
              instance_mask_format='bitmask',
              recompute_boxes=True,
              classIndexMap=None):
     super(DatasetMapper, self).__init__()
     self.project = project
     self.fileServer = fileServer
     self.augmentations = augmentations
     if not isinstance(self.augmentations, T.AugmentationList):
         self.augmentations = T.AugmentationList(self.augmentations)
     self.is_train = is_train
     self.image_format = image_format
     self.instance_mask_format = instance_mask_format
     self.recompute_boxes = recompute_boxes
     self.classIndexMap = classIndexMap  # used to map e.g. segmentation index values from AIDE to model
     self.keypoint_hflip_indices = None  #TODO
Ejemplo n.º 9
0
    def __init__(
        self,
        *,
        augmentations: List[Union[T.Augmentation, T.Transform]],
        image_format: str,
        panoptic_target_generator: Callable,
    ):
        """
        NOTE: this interface is experimental.

        Args:
            augmentations: a list of augmentations or deterministic transforms to apply
            image_format: an image format supported by :func:`detection_utils.read_image`.
            panoptic_target_generator: a callable that takes "panoptic_seg" and
                "segments_info" to generate training targets for the model.
        """
        # fmt: off
        self.augmentations = T.AugmentationList(augmentations)
        self.image_format = image_format
        # fmt: on
        logger = logging.getLogger(__name__)
        logger.info("Augmentations used in training: " + str(augmentations))

        self.panoptic_target_generator = panoptic_target_generator
Ejemplo n.º 10
0
def build_train_dataloader(cfg):  # like 'build_detection_train_loader'
    if 'coco_2017_train' in cfg.DATASETS.TRAIN:
        descs_train: List[Dict] = DatasetCatalog.get("coco_2017_train")
        ds_train = DatasetFromList(descs_train, copy=False)
        mapper = DatasetMapper(cfg, True)
    else:  # Open-Image-Dataset
        if 'get_detection_dataset_dicts':
            all_descs_train: List[Dict] = DatasetCatalog.get("oid_train")
        if 'rebalancing':
            image_id_vs_idx = {}
            for idx, desc in enumerate(all_descs_train):
                image_id_vs_idx[desc['image_id']] = idx
            descs_train = list(map(lambda img_id: all_descs_train[image_id_vs_idx[img_id]], sample_image_ids()))
            print('_' * 50 + f'train dataset len: {len(descs_train)}')

        ds_train = DatasetFromList(descs_train, copy=False)

        if 'DatasetMapper':
            augs = [RandomContrast(0.8, 1.2),
                    RandomBrightness(0.8, 1.2),
                    RandomSaturation(0.8, 1.2)]
            augs.extend(build_augmentation(cfg, is_train=True))
            mapper = make_mapper('oid_train', is_train=True, augmentations=T.AugmentationList(augs))
    ds_train = MapDataset(ds_train, mapper)

    sampler = TrainingSampler(len(ds_train))
    data_loader = build_batch_data_loader(
        ds_train,
        sampler,
        cfg.SOLVER.IMS_PER_BATCH,
        aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
    )
    global DATA_LOADER
    DATA_LOADER = data_loader
    return data_loader
Ejemplo n.º 11
0
from detectron2.utils.visualizer import Visualizer, ColorMode
import detectron2.data.transforms as T
import numpy as np

import torch
from utils.trainer import InvertColors

augs = T.AugmentationList(
    [
        InvertColors(),
        T.Resize((300,800)), 
#         T.RandomContrast(1.5, 2.5),
        T.PadTransform(100, 100, 100, 100),
    ]
)


def augment(im):
    input = T.AugInput(im)
    transform = augs(input)  # type: T.Transform
    x = input.image  # new image
    
    return x

def sort_predictions(outputs):
    pred_classes = []
    scores = []
    for out in outputs:
        idxs = np.argsort(out["instances"].pred_boxes.tensor.to('cpu')[:,0])
        pred_classes.append(out["instances"].pred_classes[idxs])
        scores.append(out["instances"].scores[idxs])
Ejemplo n.º 12
0
from detectron2.utils.visualizer import Visualizer, ColorMode
import detectron2.data.transforms as T
import numpy as np

import torch
from utils.trainer import InvertColors

augs = T.AugmentationList([
    T.Resize((600, 400)),
    T.RandomContrast(1.5, 2.5),
    T.PadTransform(100, 100, 100, 100),
])


def augment(im):
    input = T.AugInput(im)
    transform = augs(input)  # type: T.Transform
    x = input.image  # new image
    return x


def sort_predictions(outputs):
    pred_classes = []
    scores = []
    for out in outputs:
        idxs = np.argsort(out["instances"].pred_boxes.tensor.to('cpu')[:, 0])
        pred_classes.append(out["instances"].pred_classes[idxs])
        scores.append(out["instances"].scores[idxs])

    return pred_classes, scores
Ejemplo n.º 13
0
from detectron2.data import DatasetMapper
from detectron2.utils.visualizer import Visualizer
import os
import cv2
import random
import numpy as np
print(torch.__version__)
import Params as P

USE_SAVED_MODEL = True
SHOW_INPUTS = False

augs = transforms.AugmentationList([
    transforms.RandomBrightness(0.5, 1.5),
    transforms.RandomContrast(0.5, 1.5),
    transforms.RandomSaturation(0.5, 1.5),
    transforms.RandomFlip(prob=0.5),
    transforms.RandomExtent(scale_range=(0.1, 3), shift_range=(0.5, 0.5)),
    transforms.Resize(P.CNN_INPUT_SHAPE)
])


class Trainer(DefaultTrainer):
    @classmethod
    def build_train_loader(cls, cfg):
        mapper = DatasetMapper(cfg, is_train=True, augmentations=augs)
        return build_detection_train_loader(cfg, mapper=mapper)


for d in ["train"]:  #, "valid"
    with open(P.DATASET_DIR + d + "/labels.json", 'r') as fp:
        dataset_dicts = json.load(fp)
Ejemplo n.º 14
0
    def __call__(self, dataset_dict):
        """Loads image & attributes into the dict, returns a pair - for the original and the flipped ones.

        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
            See full list of keys here: https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html

        Returns:
            tuple(dict, dict): a tuple where the first dict contains the data for the image augmented in a
            default way, and the second dict contains the same image but x-flipped

        Most of code comes from the original `__call__`. The only difference is the last few lines of code.
        There, the list of transforms is extended with an additional x-flip and its applied
        to the image. Note that it may happen that the resulting transforms list will have two x-flips
        (which is effectively no flip) and one may reason we could simply keep the original image untouched
        and flip its copy. However, we want to keep things as it is because only the original image (in the first
        dict) is used for the supervised training and the x-flipped image is used only for CSD loss. So if
        the original image would never get x-flipped, the model effectively will never be trained on x-flipped
        images.
        """

        # Load the image (D2's original code)
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.image_format)
        utils.check_image_size(dataset_dict, image)

        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
        else:
            sem_seg_gt = None

        def apply_image_augmentations(image, dataset_dict, sem_seg_gt,
                                      augmentations):
            """Applies given augmentation to the given image and its attributes (segm, instances, etc).

            Almost no changes from D2's original code (apart from erasing non-relevant portions, e.g. for
            keypoints), just wrapped it in a function to avoid duplicate code."""

            aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
            transforms = augmentations(aug_input)
            image, sem_seg_gt = aug_input.image, aug_input.sem_seg

            image_shape = image.shape[:2]  # h, w
            # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
            # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
            # Therefore it's important to use torch.Tensor.
            dataset_dict["image"] = torch.as_tensor(
                np.ascontiguousarray(image.transpose(2, 0, 1)))
            if sem_seg_gt is not None:
                dataset_dict["sem_seg"] = torch.as_tensor(
                    sem_seg_gt.astype("long"))

            if not self.is_train:
                dataset_dict.pop("annotations", None)
                dataset_dict.pop("sem_seg_file_name", None)
                return dataset_dict

            if "annotations" in dataset_dict:
                for anno in dataset_dict["annotations"]:
                    if not self.use_instance_mask:
                        anno.pop("segmentation", None)
                    if not self.use_keypoint:
                        anno.pop("keypoints", None)

                annos = [
                    utils.transform_instance_annotations(
                        obj,
                        transforms,
                        image_shape,
                        keypoint_hflip_indices=self.keypoint_hflip_indices,
                    ) for obj in dataset_dict.pop("annotations")
                    if obj.get("iscrowd", 0) == 0
                ]
                instances = utils.annotations_to_instances(
                    annos, image_shape, mask_format=self.instance_mask_format)

                # After transforms such as cropping are applied, the bounding box may no longer
                # tightly bound the object. As an example, imagine a triangle object
                # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
                # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
                # the intersection of original bounding box and the cropping box.
                if self.recompute_boxes:
                    instances.gt_boxes = instances.gt_masks.get_bounding_boxes(
                    )
                dataset_dict["instances"] = utils.filter_empty_instances(
                    instances)

            return dataset_dict, transforms

        # Store the copies of image and its metadata for the future x-flip
        dataset_dict_flipped, image_flipped, sem_seg_gt_flipped = (
            dataset_dict.copy(),
            image.copy(),
            sem_seg_gt.copy() if sem_seg_gt else None,
        )

        # Augment the original image
        original_dataset_dict, transforms = apply_image_augmentations(
            image, dataset_dict, sem_seg_gt, self.augmentations)

        # Extend instantiated transforms with an additional x-flip in the end; see `TransformList.`__add__`
        transforms_w_flip = transforms + T.HFlipTransform(image.shape[1])
        # Transform Transforms to Augmentations; to learn more on how they differ you can check my note here:
        # https://www.notion.so/vlfom/How-augmentations-work-in-DatasetMapper-a4832df03489429ba04b9bc8d0e12dc6
        augs_w_flip = T.AugmentationList(transforms_w_flip)
        # Obtain the x-flipped data
        flipped_dataset_dict, _ = apply_image_augmentations(
            image_flipped, dataset_dict_flipped, sem_seg_gt_flipped,
            augs_w_flip)

        return (original_dataset_dict, flipped_dataset_dict)