예제 #1
0
def get_test_transforms():
    return A.Compose([
        A.Resize(Config.Train.img_size, Config.Train.img_size, p=1.0),
        ToTensorV2(p=1.0)
    ],
                     bbox_params=BboxParams('pascal_voc',
                                            label_fields=['labels']))
예제 #2
0
def get_train_transforms():
    # noinspection PyTypeChecker
    return A.Compose([
        A.RandomSizedCrop(
            min_max_height=(850, 850), height=1024, width=1024, p=0.3),
        A.OneOf([
            A.HueSaturationValue(hue_shift_limit=0.2,
                                 sat_shift_limit=0.2,
                                 val_shift_limit=0.2,
                                 p=0.8),
            A.RandomBrightnessContrast(
                brightness_limit=0.2, contrast_limit=0.2, p=0.9)
        ],
                p=0.5),
        A.OneOf([
            A.RandomRain(rain_type='drizzle', p=0.2),
            A.GaussianBlur(blur_limit=7, p=0.5),
            A.GaussNoise((0.2, 0.25), p=0.3),
            A.RandomShadow(p=0.2)
        ],
                p=0.4),
        A.ToGray(p=0.01),
        A.Flip(p=0.5),
        A.CoarseDropout(max_height=64,
                        max_width=64,
                        min_holes=3,
                        min_height=32,
                        min_width=32,
                        p=0.5),
        A.Resize(Config.Train.img_size, Config.Train.img_size, p=1.0),
        ToTensorV2(p=1.0),
    ],
                     bbox_params=BboxParams('pascal_voc',
                                            label_fields=['labels'],
                                            min_visibility=0.0))
예제 #3
0
def get_aug(aug, min_area=0., min_visibility=0.):
    return Compose(aug,
                   bbox_params=BboxParams(format='pascal_voc',
                                          min_area=min_area,
                                          min_visibility=min_visibility,
                                          label_fields=[
                                              'category_id'
                                          ]))  #这里的format也要根据bbox的格式进行修改
예제 #4
0
def build_transforms(cfg, mode='train', norm_image=True):
    assert mode in ['train', 'test', 'val']
    min_size = cfg.SCALES[0]
    max_size = cfg.SCALES[1]
    assert min_size <= max_size

    if mode == 'train':
        flip_prob = cfg.TRAIN.FLIP_PROB
    elif mode == 'test':
        flip_prob = cfg.TEST.FLIP_PROB
    else:
        flip_prob = cfg.VAL.FLIP_PROB

    to_bgr255 = True

    normalize_transform = T.Normalize(
        mean=cfg.NETWORK.PIXEL_MEANS, std=cfg.NETWORK.PIXEL_STDS, to_bgr255=to_bgr255
    )

    # transform = T.Compose(
    #     [
    #         T.Resize(min_size, max_size),
    #         T.RandomHorizontalFlip(flip_prob),
    #         T.ToTensor(),
    #         normalize_transform,
    #         T.FixPadding(min_size, max_size, pad=0)
    #     ]
    # )
    bbox_params = BboxParams(
        format='pascal_voc',
        min_area=0, 
        min_visibility=0.2,
        label_fields=['fake_label'])
    album_augs = [
        HorizontalFlip(p=0.5),
        # RandomBrightness(limit=0.2, p=0.5),
        # RandomContrast(limit=0.2, p=0.5),
        RandomScale(scale_limit=(-0.3, 0.0), p=0.3),
        # MedianBlur(blur_limit=5, p=0.3),
        # Rotate(limit=30, p=0.25),
    ]
    album_augs = Compose(album_augs, bbox_params=bbox_params)

    if mode == 'train':
        all_augs = [
            T.Resize(min_size, max_size),
            T.ToTensor(),
            album_augs,
        ]
    else:
        all_augs = [
            T.Resize(min_size, max_size),
            T.ToTensor(),
        ]
    if norm_image:
        all_augs.append(normalize_transform)
    transform = T.Compose(all_augs)
    return transform
예제 #5
0
def light_aug_detection(p=1.0):
    return Compose([
        Flip(p=1),
        Transpose(p=0.8),
        ShiftScaleRotate(shift_limit=0.01, scale_limit=0.01, rotate_limit=30, p=1),
        RandomBrightnessContrast(brightness_limit=(-0.01, 0.01), contrast_limit=(-0.01, 0.01), p=p),
    ],
        bbox_params=BboxParams(format='pascal_voc', label_fields=['category_ids']),
        p=p)
예제 #6
0
    def __init__(self,
                 data_dir_GT,
                 data_dir_LQ,
                 batch_size,
                 shuffle=True,
                 validation_split=0.0,
                 num_workers=1,
                 training=True):
        #data transformation
        #According to this link: https://discuss.pytorch.org/t/normalization-of-input-image/34814/8
        #satellite image 0.5 is good otherwise calculate mean and std for the whole dataset.
        #calculted mean and std using method from util
        '''
        Data transform for GAN training
        '''
        data_transforms_train = Compose(
            [
                HorizontalFlip(),
                Normalize(  #mean std for potsdam dataset from COWC [Calculate also for spot6]
                    mean=[0.3442, 0.3708, 0.3476],
                    std=[0.1232, 0.1230, 0.1284])
            ],
            additional_targets={'image_lq': 'image'},
            bbox_params=BboxParams(format='pascal_voc',
                                   min_area=0,
                                   min_visibility=0,
                                   label_fields=['labels']))

        data_transforms_test = Compose(
            [
                Normalize(  #mean std for potsdam dataset from COWC [Calculate also for spot6]
                    mean=[0.3442, 0.3708, 0.3476],
                    std=[0.1232, 0.1230, 0.1284])
            ],
            additional_targets={'image_lq': 'image'})

        self.data_dir_gt = data_dir_GT
        self.data_dir_lq = data_dir_LQ

        if training == True:
            self.dataset = COWCGANDataset(self.data_dir_gt,
                                          self.data_dir_lq,
                                          transform=data_transforms_train)
        else:
            self.dataset = COWCGANDataset(self.data_dir_gt,
                                          self.data_dir_lq,
                                          transform=data_transforms_test)
        self.length = len(self.dataset)
        super().__init__(self.dataset,
                         batch_size,
                         shuffle,
                         validation_split,
                         num_workers,
                         collate_fn=collate_fn)
 def test_input(self):
     safecrop = RandomSizedBBoxSafeCrop(image_in='x',
                                        height=self.height,
                                        width=self.width,
                                        bbox_in="x_bbox",
                                        bbox_params=BboxParams("coco"))
     output = safecrop.forward(data=self.single_input, state={})
     with self.subTest('Check output type'):
         self.assertEqual(type(output), list)
     with self.subTest('Check output image shape'):
         self.assertEqual(output[0].shape, self.single_output_shape)
예제 #8
0
    def __getitem__(self, idx):
        img = Image.open(self._images_path[idx])
        img = np.array(img)

        # read keypoints
        # type - x - y
        keypoints = []
        with open(self._keypoints_path[idx], 'r') as f:
            c, x, y = f.readlines().strip().split('\n')
            keypoints.append([self._opt['classes mapping'][c], x, y])

        # coco format -> yolo format
        bbox_size = 32  # parametro configurabile
        h, w = img.shape[:2]
        bboxes = [[kp[1], kp[2], bbox_size, bbox_size] for kp in keypoints]
        normalized_bboxes = [[
            bbox[0] / w, bbox[1] / h, bbox[2] / w, bbox[3] / h
        ] for bbox in bboxes]

        old_h = img.shape[0]
        old_w = img.shape[1]
        new_h = img.shape[0]
        new_w = img.shape[1]
        if not img.shape[1] % 32 == 0:
            new_w = old_w + (32 - old_w % 32)
            ratio = new_w / old_w
            new_h = int(old_h * ratio)
            self._transforms.append(Resize(new_h, new_w))

        if not img.shape[0] % 32 == 0:
            new_h = new_h + (32 - new_h % 32)
            self._transforms.append(PadIfNeeded(new_h, new_w))

        self._transforms.append(Normalize(p=1.0))

        aug = Compose(self._transforms,
                      bbox_params=BboxParams(format='yolo',
                                             label_fields=['category_id']))
        res = aug(image=img,
                  bboxes=normalized_bboxes,
                  category_id=keypoints[:, 0])

        img = res['image']
        bboxes = res['bboxes']
        classes_id = res['category_id']

        img = img.transpose(2, 0, 1)

        labels = [[0, classes_id[i], bbox[0], bbox[1], bbox[2], bbox[3]]
                  for i, bbox in enumerate(bboxes)]

        return torch.from_numpy(img), torch.from_numpy(
            labels), self._images_path[idx], None
def make_transforms(dict):
    if dict is None:
        return None
    transforms = []
    if 'BboxParams' in dict:
        bbox_args = dict['BboxParams']
        del dict['BboxParams']
    else:
        bbox_args = {'format': 'pascal_voc', 'label_fields': ['labels']}
    for name, kwargs in dict.items():
        transforms.append(instantiate(name, kwargs))
    return Compose(transforms=transforms, bbox_params=BboxParams(**bbox_args))
def get_aug(min_area=0., min_visibility=0.):
    return Compose(
        OneOf([
        RandomContrast(p=0.1, limit=(-0.5,1)),   # -0.5 ~ 2 -- RandomBrightnessContrast
        RandomBrightness(p=0.05, limit=(-0.2,0.1)),
        HorizontalFlip(p=0.0),
        ], p=0.5),

        bbox_params=BboxParams(format='pascal_voc', min_area=min_area, 
                               min_visibility=min_visibility, label_fields=['category_id'])
                               
    )
 def get_aug(self, aug, min_area=0., min_visibility=0.3):
     """
     Args:
     aug - set of albumentation augmentations
     min_area - minimum area to keep bbox
     min_visibility - minimum area percentage (to keep bbox) of original bbox after transform
     """
     return Compose(aug,
                    bbox_params=BboxParams(format='coco',
                                           min_area=min_area,
                                           min_visibility=min_visibility,
                                           label_fields=['category_id']))
예제 #12
0
 def __init__(self,
              func: DualTransform,
              mode: Union[None, str, Iterable[str]] = None,
              ds_id: Union[None, str, Iterable[str]] = None,
              image_in: Optional[str] = None,
              mask_in: Optional[str] = None,
              masks_in: Optional[str] = None,
              bbox_in: Optional[str] = None,
              keypoints_in: Optional[str] = None,
              image_out: Optional[str] = None,
              mask_out: Optional[str] = None,
              masks_out: Optional[str] = None,
              bbox_out: Optional[str] = None,
              keypoints_out: Optional[str] = None,
              bbox_params: Union[BboxParams, str, None] = None,
              keypoint_params: Union[KeypointParams, str, None] = None,
              extra_in_keys: Optional[Dict[str, str]] = None,
              extra_out_keys: Optional[Dict[str, str]] = None):
     assert any((image_in, mask_in, masks_in, bbox_in,
                 keypoints_in)), "At least one input must be non-None"
     image_out = image_out or image_in
     mask_out = mask_out or mask_in
     masks_out = masks_out or masks_in
     bbox_out = bbox_out or bbox_in
     keypoints_out = keypoints_out or keypoints_in
     keys = OrderedDict([("image", image_in), ("mask", mask_in),
                         ("masks", masks_in), ("bboxes", bbox_in),
                         ("keypoints", keypoints_in)])
     if extra_in_keys:
         keys.update(extra_in_keys)
     self.keys_in = OrderedDict([(k, v) for k, v in keys.items()
                                 if v is not None])
     keys = OrderedDict([("image", image_out), ("mask", mask_out),
                         ("masks", masks_out), ("bboxes", bbox_out),
                         ("keypoints", keypoints_out)])
     if extra_out_keys:
         keys.update(extra_out_keys)
     self.keys_out = OrderedDict([(k, v) for k, v in keys.items()
                                  if v is not None])
     super().__init__(inputs=list(self.keys_in.values()),
                      outputs=list(self.keys_out.values()),
                      mode=mode,
                      ds_id=ds_id)
     if isinstance(bbox_params, str):
         bbox_params = BboxParams(bbox_params)
     if isinstance(keypoint_params, str):
         keypoint_params = KeypointParams(keypoint_params)
     self.func = Compose(transforms=[func],
                         bbox_params=bbox_params,
                         keypoint_params=keypoint_params)
예제 #13
0
 def __init__(self, box_format='coco'):
     self.tsfm = Compose(
         [
             HorizontalFlip(),
             # RandomResizedCrop(512, 512, scale=(0.75, 1)),
             RandomBrightnessContrast(0.4, 0.4),
             GaussNoise(),
             RGBShift(),
             CLAHE(),
             RandomGamma()
         ],
         bbox_params=BboxParams(format=box_format,
                                min_visibility=0.75,
                                label_fields=['labels']))
예제 #14
0
def train_multi_augment12(image, bboxes=None, category_id=None):
    h, w = image.shape[0], image.shape[1]
    if bboxes is not None:
        aug = Compose(
            [
                HorizontalFlip(p=0.5),
                ShiftScaleRotate(
                    shift_limit=0.05,
                    scale_limit=0.05,
                    rotate_limit=5,
                    border_mode=cv2.BORDER_REPLICATE,
                    p=1,
                ),
                RandomSizedCrop(min_max_height=(int(h * 0.9), h),
                                height=h,
                                width=w,
                                p=0.25),
                RandomBrightnessContrast(
                    brightness_limit=0.0, contrast_limit=0.3, p=0.25),
            ],
            p=1,
            bbox_params=BboxParams(format="pascal_voc",
                                   label_fields=["category_id"]),
        )
        augmented = aug(image=image, bboxes=bboxes, category_id=category_id)

    else:  # Normal
        aug = Compose(
            [
                HorizontalFlip(p=0.5),
                ShiftScaleRotate(
                    shift_limit=0.05,
                    scale_limit=0.05,
                    rotate_limit=5,
                    border_mode=cv2.BORDER_REPLICATE,
                    p=1,
                ),
                RandomSizedCrop(min_max_height=(int(h * 0.9), h),
                                height=h,
                                width=w,
                                p=0.25),
                RandomBrightnessContrast(
                    brightness_limit=0.3, contrast_limit=0.3, p=0.25),
            ],
            p=1,
        )
        augmented = aug(image=image)

    return augmented
    def transform_val(self, sample):
        """ data augmentation for training """
        img = sample["image"]
        bboxes = sample["bboxes"]

        imgH = img.shape[0]
        imgW = img.shape[1]

        if imgW / imgH < 2.5:
            scale_factor = min(self.args.img_size[0] / imgH,
                               self.args.img_size[1] / imgW)
        else:
            scale_factor = 1.0

        random_scale = np.random.randint(8, 11) / 10

        if bboxes.size == 0:
            bboxes = np.array([[
                0.1, 0.1, 0.1, 0.1, 0.0, 0.0
            ]])  # this is just a dummy - all values must be inside (0,1)

        annotations = {'image': img, 'bboxes': bboxes}

        transforms = (
            [  #Resize(height=int(scale_factor * imgH), width=int(scale_factor * imgW),
                #      p=1.0),
                # PadIfNeeded(min_height=self.args.img_size[0], min_width=self.args.img_size[1],
                #             border_mode=cv2.BORDER_REPLICATE,
                #             p=1.0),
                # changing image size - mainting aspect ratio for later resize
                # OneOf([RandomCrop(height=self.args.img_size[0], width=self.args.img_size[1], p=0.5),
                #        RandomCrop(height=int(random_scale * self.args.img_size[0]),
                #                   width=int(random_scale * self.args.img_size[1]), p=0.5)], p=1.0),
                # making sure resize fits with yolo input size
                Resize(height=self.args.img_size[0],
                       width=self.args.img_size[1],
                       p=1.0),
                Normalize(p=1.0)
            ])

        preform_augmentation = Compose(transforms,
                                       bbox_params=BboxParams(
                                           format='yolo', min_visibility=0.3))
        augmented_sample = preform_augmentation(**annotations)

        augmented_sample["bboxes"] = np.array(augmented_sample["bboxes"])

        return augmented_sample
예제 #16
0
def train_transform(
    from_dicom: bool,
    longest_max_size: int,
    additional_transforms: List[BasicTransform] = None
) -> Compose:
    additional_transforms = additional_transforms if additional_transforms is not None else []
    initial_transforms = [
        LongestMaxSize(longest_max_size),
    ]
    if from_dicom:
        initial_transforms.insert(0, Lambda(image=stack_channels_for_rgb))
    final_transforms = [
        ToFloat(),
        ToTensorV2(),
    ]
    transforms = initial_transforms + additional_transforms + final_transforms
    return Compose(transforms, bbox_params=BboxParams(format='pascal_voc', label_fields=['labels']))
예제 #17
0
    def __init__(self, train, size=(224, 224)):
        # transforms = [Resize(*size)]
        transforms = []

        if train:
            # default p=0.5
            transforms.extend([Flip(), Rotate()])

        # normalize default imagenet
        # mean (0.485, 0.456, 0.406)
        # std (0.229, 0.224, 0.225)
        # transforms.append(Normalize())

        self.aug = Compose(transforms)
        self.aug_train = Compose(transforms,
                                 bbox_params=BboxParams(
                                     format='pascal_voc',
                                     label_fields=['labels']))
예제 #18
0
파일: gen.py 프로젝트: stdbreaks/centernet
def strong_aug(p=0.6):
    return Compose([
        RandomShadow(shadow_roi=(0, 0, 1, 1), p=0.75),
        OneOf([MotionBlur(), GaussianBlur()]),
        OneOf([ToGray(), ToSepia()]),
        OneOf([
            InvertImg(),
            RandomBrightnessContrast(brightness_limit=0.75, p=0.75),
            RandomGamma(),
            HueSaturationValue()
        ],
              p=0.75)
    ],
                   bbox_params=BboxParams("pascal_voc",
                                          label_fields=["category_id"],
                                          min_area=0.0,
                                          min_visibility=0.0),
                   p=p)
예제 #19
0
    def __init__(self,
                 data_dir,
                 batch_size,
                 shuffle=True,
                 validation_split=0.0,
                 num_workers=1,
                 training=True):
        #data transformation
        #According to this link: https://discuss.pytorch.org/t/normalization-of-input-image/34814/8
        #satellite image 0.5 is good otherwise calculate mean and std for the whole dataset.
        #calculted mean and std using method from util
        data_transforms = Compose(
            [
                Resize(256, 256),
                HorizontalFlip(),
                OneOf([
                    IAAAdditiveGaussianNoise(),
                    GaussNoise(),
                ], p=0.2),
                OneOf([
                    CLAHE(clip_limit=2),
                    IAASharpen(),
                    IAAEmboss(),
                    RandomBrightnessContrast(),
                ],
                      p=0.3),
                HueSaturationValue(p=0.3),
                Normalize(  #mean std for potsdam dataset from COWC [Calculate also for spot6]
                    mean=[0.3442, 0.3708, 0.3476],
                    std=[0.1232, 0.1230, 0.1284])
            ],
            bbox_params=BboxParams(format='pascal_voc',
                                   min_area=0,
                                   min_visibility=0,
                                   label_fields=['labels']))

        self.data_dir = data_dir
        self.dataset = COWCDataset(self.data_dir, transform=data_transforms)
        super().__init__(self.dataset,
                         batch_size,
                         shuffle,
                         validation_split,
                         num_workers,
                         collate_fn=collate_fn)
예제 #20
0
    def get_aug(self, label_field, min_area=0.0, min_visibility=0.0):
        """returns a function that applies the list of transformations.

        Args:
            label_field (str): The feild in the dictionary that contains the
                    name labels
            min_area (float, optional): minimum area of bbox that is considered
            min_visibility (float, optional): minimum area of bbox to be
                     visible

        Returns:
            function: function to apply list of agumentations on images and
                     bboxes
        """
        return Compose(self.aug,
                       bbox_params=BboxParams(format=self.annotation_format,
                                              min_area=min_area,
                                              min_visibility=min_visibility,
                                              label_fields=[label_field]),
                       p=self.p)
예제 #21
0
파일: data.py 프로젝트: jwparktom/beetect
    def __init__(self, train, size=(224, 224)):
        transforms = [Resize(*size)]

        if train:
            # default p=0.5
            transforms.extend([
                Normalize(mean=(0.485, 0.456, 0.406),
                          std=(0.229, 0.224, 0.225),
                          p=1),
                Flip(),
                Rotate()
            ])

        # transforms.extend([
        #     ToTensor()
        # ])

        self.aug = Compose(transforms)
        self.aug_train = Compose(transforms,
                                 bbox_params=BboxParams(
                                     format='pascal_voc',
                                     label_fields=['labels']))
예제 #22
0
 def __data_parse(self, ID):
     vars_dict = {}
     data = pd.read_excel(os.path.join('datasets', self.dataset,
                                       self.subset, ID + '.xlsx'),
                          sheet_name=None)
     for series_num, (sheet_name, df) in enumerate(data.items()):
         lines = [[
             float(x)
             if x.replace('.', '').replace('-', '').isnumeric() else x
             for x in l.split(',')
         ][1:] for l in df.to_csv().split('\n')][:-1]
         vars_dict.update(
             self.__parse_global(lines[:2], self.global_vars, series_num,
                                 'global_vars'))
         #vars_dict.update(self.__parse_objects(lines[2:], self.objects_vars, series_num, 'objects_vars'))
     transformed = {k: v['value'] for k, v in vars_dict.items()}
     transform = Compose(
         self.augment_params,
         additional_targets={k: v['type']
                             for k, v in vars_dict.items()},
         keypoint_params=KeypointParams(format='xy',
                                        remove_invisible=False),
         bbox_params=BboxParams(format='albumentations',
                                min_visibility=0.1),
         p=0.5)
     transformed = transform(
         image=np.array([[0, 0, 0]], dtype='uint8'),
         bboxes=[],
         keypoints=[],
         **{k: v['value']
            for k, v in vars_dict.items()})
     for k, v in transformed.items():
         if k in ['image', 'bboxes', 'keypoints']:
             continue
         vars_dict[k].update({'value': v})
     return vars_dict
예제 #23
0
    def __init__(self,
                 image_fns,
                 gt_boxes=None,
                 label_to_int=None,
                 augment=False,
                 train_image_dir='train_images',
                 test_image_dir='test_images',
                 height=1536,
                 width=1536,
                 feature_scale=0.25):
        self.image_fns = image_fns
        self.gt_boxes = gt_boxes
        self.label_to_int = label_to_int
        self.augment = augment
        self.aug = Compose([
            ShiftScaleRotate(p=0.9,
                             rotate_limit=10,
                             scale_limit=0.2,
                             border_mode=cv2.BORDER_CONSTANT),
            RandomCrop(512, 512, p=1.0),
            ToGray(),
            CLAHE(),
            GaussNoise(),
            GaussianBlur(),
            RandomBrightnessContrast(),
            RandomGamma(),
            RGBShift(),
            HueSaturationValue(),
        ],
                           bbox_params=BboxParams(format='coco',
                                                  min_visibility=0.75))

        self.encoded_cache = None
        self.height = height
        self.width = width
        self.feature_scale = feature_scale
예제 #24
0
def get_estimator(data_dir=None,
                  model_dir=tempfile.mkdtemp(),
                  batch_size=16,
                  epochs=13,
                  max_train_steps_per_epoch=None,
                  max_eval_steps_per_epoch=None,
                  image_size=512,
                  num_classes=90):
    # pipeline
    train_ds, eval_ds = mscoco.load_data(root_dir=data_dir)
    pipeline = fe.Pipeline(
        train_data=train_ds,
        eval_data=eval_ds,
        batch_size=batch_size,
        ops=[
            ReadImage(inputs="image", outputs="image"),
            LongestMaxSize(image_size,
                           image_in="image",
                           image_out="image",
                           bbox_in="bbox",
                           bbox_out="bbox",
                           bbox_params=BboxParams("coco", min_area=1.0)),
            PadIfNeeded(
                image_size,
                image_size,
                border_mode=cv2.BORDER_CONSTANT,
                image_in="image",
                image_out="image",
                bbox_in="bbox",
                bbox_out="bbox",
                bbox_params=BboxParams("coco", min_area=1.0),
            ),
            Sometimes(
                HorizontalFlip(mode="train",
                               image_in="image",
                               image_out="image",
                               bbox_in="bbox",
                               bbox_out="bbox",
                               bbox_params='coco')),
            # normalize from uint8 to [-1, 1]
            Normalize(inputs="image",
                      outputs="image",
                      mean=1.0,
                      std=1.0,
                      max_pixel_value=127.5),
            ShiftLabel(inputs="bbox", outputs="bbox"),
            AnchorBox(inputs="bbox",
                      outputs="anchorbox",
                      width=image_size,
                      height=image_size),
            ChannelTranspose(inputs="image", outputs="image")
        ],
        pad_value=0)
    # network
    model = fe.build(model_fn=lambda: RetinaNet(num_classes=num_classes),
                     optimizer_fn=lambda x: torch.optim.SGD(
                         x, lr=2e-4, momentum=0.9, weight_decay=0.0001))
    network = fe.Network(ops=[
        ModelOp(model=model, inputs="image", outputs=["cls_pred", "loc_pred"]),
        RetinaLoss(inputs=["anchorbox", "cls_pred", "loc_pred"],
                   outputs=["total_loss", "focal_loss", "l1_loss"]),
        UpdateOp(model=model, loss_name="total_loss"),
        PredictBox(input_shape=(image_size, image_size, 3),
                   inputs=["cls_pred", "loc_pred"],
                   outputs="pred",
                   mode="eval")
    ])
    # estimator
    traces = [
        LRScheduler(model=model, lr_fn=lr_fn),
        BestModelSaver(model=model,
                       save_dir=model_dir,
                       metric='mAP',
                       save_best_mode="max"),
        MeanAveragePrecision(num_classes=num_classes,
                             true_key='bbox',
                             pred_key='pred',
                             mode="eval")
    ]
    estimator = fe.Estimator(
        pipeline=pipeline,
        network=network,
        epochs=epochs,
        traces=traces,
        max_train_steps_per_epoch=max_train_steps_per_epoch,
        max_eval_steps_per_epoch=max_eval_steps_per_epoch,
        monitor_names=["l1_loss", "focal_loss"])
    return estimator
예제 #25
0
def get_estimator(data_dir=None,
                  model_dir=tempfile.mkdtemp(),
                  epochs=200,
                  batch_size_per_gpu=32,
                  train_steps_per_epoch=None,
                  eval_steps_per_epoch=None):
    num_device = get_num_devices()
    train_ds, val_ds = mscoco.load_data(root_dir=data_dir)
    train_ds = PreMosaicDataset(mscoco_ds=train_ds)
    batch_size = num_device * batch_size_per_gpu
    pipeline = fe.Pipeline(
        train_data=train_ds,
        eval_data=val_ds,
        ops=[
            ReadImage(inputs=("image1", "image2", "image3", "image4"),
                      outputs=("image1", "image2", "image3", "image4"),
                      mode="train"),
            ReadImage(inputs="image", outputs="image", mode="eval"),
            LongestMaxSize(max_size=640,
                           image_in="image1",
                           bbox_in="bbox1",
                           bbox_params=BboxParams("coco", min_area=1.0),
                           mode="train"),
            LongestMaxSize(max_size=640,
                           image_in="image2",
                           bbox_in="bbox2",
                           bbox_params=BboxParams("coco", min_area=1.0),
                           mode="train"),
            LongestMaxSize(max_size=640,
                           image_in="image3",
                           bbox_in="bbox3",
                           bbox_params=BboxParams("coco", min_area=1.0),
                           mode="train"),
            LongestMaxSize(max_size=640,
                           image_in="image4",
                           bbox_in="bbox4",
                           bbox_params=BboxParams("coco", min_area=1.0),
                           mode="train"),
            LongestMaxSize(max_size=640,
                           image_in="image",
                           bbox_in="bbox",
                           bbox_params=BboxParams("coco", min_area=1.0),
                           mode="eval"),
            PadIfNeeded(min_height=640,
                        min_width=640,
                        image_in="image",
                        bbox_in="bbox",
                        bbox_params=BboxParams("coco", min_area=1.0),
                        mode="eval",
                        border_mode=cv2.BORDER_CONSTANT,
                        value=(114, 114, 114)),
            CombineMosaic(inputs=("image1", "image2", "image3", "image4",
                                  "bbox1", "bbox2", "bbox3", "bbox4"),
                          outputs=("image", "bbox"),
                          mode="train"),
            CenterCrop(height=640,
                       width=640,
                       image_in="image",
                       bbox_in="bbox",
                       bbox_params=BboxParams("coco", min_area=1.0),
                       mode="train"),
            Sometimes(
                HorizontalFlip(image_in="image",
                               bbox_in="bbox",
                               bbox_params=BboxParams("coco", min_area=1.0),
                               mode="train")),
            HSVAugment(inputs="image", outputs="image", mode="train"),
            ToArray(inputs="bbox", outputs="bbox", dtype="float32"),
            CategoryID2ClassID(inputs="bbox", outputs="bbox"),
            GTBox(inputs="bbox",
                  outputs=("gt_sbbox", "gt_mbbox", "gt_lbbox"),
                  image_size=640),
            Delete(keys=("image1", "image2", "image3", "image4", "bbox1",
                         "bbox2", "bbox3", "bbox4", "bbox"),
                   mode="train"),
            Delete(keys="image_id", mode="eval"),
            Batch(batch_size=batch_size, pad_value=0)
        ])
    init_lr = 1e-2 / 64 * batch_size
    model = fe.build(
        lambda: YoloV5(w=640, h=640, c=3),
        optimizer_fn=lambda x: torch.optim.SGD(
            x, lr=init_lr, momentum=0.937, weight_decay=0.0005, nesterov=True),
        mixed_precision=True)
    network = fe.Network(ops=[
        RescaleTranspose(inputs="image", outputs="image"),
        ModelOp(model=model,
                inputs="image",
                outputs=("pred_s", "pred_m", "pred_l")),
        DecodePred(inputs=("pred_s", "pred_m", "pred_l"),
                   outputs=("pred_s", "pred_m", "pred_l")),
        ComputeLoss(inputs=("pred_s", "gt_sbbox"),
                    outputs=("sbbox_loss", "sconf_loss", "scls_loss")),
        ComputeLoss(inputs=("pred_m", "gt_mbbox"),
                    outputs=("mbbox_loss", "mconf_loss", "mcls_loss")),
        ComputeLoss(inputs=("pred_l", "gt_lbbox"),
                    outputs=("lbbox_loss", "lconf_loss", "lcls_loss")),
        Average(inputs=("sbbox_loss", "mbbox_loss", "lbbox_loss"),
                outputs="bbox_loss"),
        Average(inputs=("sconf_loss", "mconf_loss", "lconf_loss"),
                outputs="conf_loss"),
        Average(inputs=("scls_loss", "mcls_loss", "lcls_loss"),
                outputs="cls_loss"),
        Average(inputs=("bbox_loss", "conf_loss", "cls_loss"),
                outputs="total_loss"),
        PredictBox(width=640,
                   height=640,
                   inputs=("pred_s", "pred_m", "pred_l"),
                   outputs="box_pred",
                   mode="eval"),
        UpdateOp(model=model, loss_name="total_loss")
    ])
    traces = [
        MeanAveragePrecision(num_classes=80,
                             true_key='bbox',
                             pred_key='box_pred',
                             mode="eval"),
        BestModelSaver(model=model,
                       save_dir=model_dir,
                       metric='mAP',
                       save_best_mode="max")
    ]
    lr_schedule = {
        1:
        LRScheduler(model=model,
                    lr_fn=lambda step: lr_schedule_warmup(
                        step,
                        train_steps_epoch=np.ceil(len(train_ds) / batch_size),
                        init_lr=init_lr)),
        4:
        LRScheduler(model=model,
                    lr_fn=lambda epoch: cosine_decay(epoch,
                                                     cycle_length=epochs - 3,
                                                     init_lr=init_lr,
                                                     min_lr=init_lr / 100,
                                                     start=4))
    }
    traces.append(EpochScheduler(lr_schedule))
    estimator = fe.Estimator(
        pipeline=pipeline,
        network=network,
        epochs=epochs,
        traces=traces,
        monitor_names=["bbox_loss", "conf_loss", "cls_loss"],
        train_steps_per_epoch=train_steps_per_epoch,
        eval_steps_per_epoch=eval_steps_per_epoch)
    return estimator
예제 #26
0
 def __init__(self, is_train: bool, to_pytorch: bool, preprocess: callable):
     super().__init__(is_train, to_pytorch, preprocess)
     self._aug = Compose([self._aug],
                         bbox_params=BboxParams(
                             format='coco', label_fields=['category_ids']))
예제 #27
0
def get_aug(aug):
    return Compose(aug,
                   bbox_params=BboxParams(format="pascal_voc",
                                          label_fields=["gt_labels"]))
예제 #28
0
def get_aug(aug, min_area=0., min_visibility=0.25):
    return Compose(aug,
                   bbox_params=BboxParams(format='coco',
                                          min_area=min_area,
                                          min_visibility=min_visibility,
                                          label_fields=['category_id']))
예제 #29
0
def compose(transforms_to_compose):
    # combine all augmentations into one single pipeline
    result = albu.Compose(
        [item for sublist in transforms_to_compose for item in sublist],
        bbox_params=BboxParams(format='coco', label_fields=['category_id']))
    return result
 def get_bbox_params(self):
     return BboxParams(format='coco', label_fields=['category_id'])