def get_test_transforms(): return A.Compose([ A.Resize(Config.Train.img_size, Config.Train.img_size, p=1.0), ToTensorV2(p=1.0) ], bbox_params=BboxParams('pascal_voc', label_fields=['labels']))
def get_train_transforms(): # noinspection PyTypeChecker return A.Compose([ A.RandomSizedCrop( min_max_height=(850, 850), height=1024, width=1024, p=0.3), A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.8), A.RandomBrightnessContrast( brightness_limit=0.2, contrast_limit=0.2, p=0.9) ], p=0.5), A.OneOf([ A.RandomRain(rain_type='drizzle', p=0.2), A.GaussianBlur(blur_limit=7, p=0.5), A.GaussNoise((0.2, 0.25), p=0.3), A.RandomShadow(p=0.2) ], p=0.4), A.ToGray(p=0.01), A.Flip(p=0.5), A.CoarseDropout(max_height=64, max_width=64, min_holes=3, min_height=32, min_width=32, p=0.5), A.Resize(Config.Train.img_size, Config.Train.img_size, p=1.0), ToTensorV2(p=1.0), ], bbox_params=BboxParams('pascal_voc', label_fields=['labels'], min_visibility=0.0))
def get_aug(aug, min_area=0., min_visibility=0.): return Compose(aug, bbox_params=BboxParams(format='pascal_voc', min_area=min_area, min_visibility=min_visibility, label_fields=[ 'category_id' ])) #这里的format也要根据bbox的格式进行修改
def build_transforms(cfg, mode='train', norm_image=True): assert mode in ['train', 'test', 'val'] min_size = cfg.SCALES[0] max_size = cfg.SCALES[1] assert min_size <= max_size if mode == 'train': flip_prob = cfg.TRAIN.FLIP_PROB elif mode == 'test': flip_prob = cfg.TEST.FLIP_PROB else: flip_prob = cfg.VAL.FLIP_PROB to_bgr255 = True normalize_transform = T.Normalize( mean=cfg.NETWORK.PIXEL_MEANS, std=cfg.NETWORK.PIXEL_STDS, to_bgr255=to_bgr255 ) # transform = T.Compose( # [ # T.Resize(min_size, max_size), # T.RandomHorizontalFlip(flip_prob), # T.ToTensor(), # normalize_transform, # T.FixPadding(min_size, max_size, pad=0) # ] # ) bbox_params = BboxParams( format='pascal_voc', min_area=0, min_visibility=0.2, label_fields=['fake_label']) album_augs = [ HorizontalFlip(p=0.5), # RandomBrightness(limit=0.2, p=0.5), # RandomContrast(limit=0.2, p=0.5), RandomScale(scale_limit=(-0.3, 0.0), p=0.3), # MedianBlur(blur_limit=5, p=0.3), # Rotate(limit=30, p=0.25), ] album_augs = Compose(album_augs, bbox_params=bbox_params) if mode == 'train': all_augs = [ T.Resize(min_size, max_size), T.ToTensor(), album_augs, ] else: all_augs = [ T.Resize(min_size, max_size), T.ToTensor(), ] if norm_image: all_augs.append(normalize_transform) transform = T.Compose(all_augs) return transform
def light_aug_detection(p=1.0): return Compose([ Flip(p=1), Transpose(p=0.8), ShiftScaleRotate(shift_limit=0.01, scale_limit=0.01, rotate_limit=30, p=1), RandomBrightnessContrast(brightness_limit=(-0.01, 0.01), contrast_limit=(-0.01, 0.01), p=p), ], bbox_params=BboxParams(format='pascal_voc', label_fields=['category_ids']), p=p)
def __init__(self, data_dir_GT, data_dir_LQ, batch_size, shuffle=True, validation_split=0.0, num_workers=1, training=True): #data transformation #According to this link: https://discuss.pytorch.org/t/normalization-of-input-image/34814/8 #satellite image 0.5 is good otherwise calculate mean and std for the whole dataset. #calculted mean and std using method from util ''' Data transform for GAN training ''' data_transforms_train = Compose( [ HorizontalFlip(), Normalize( #mean std for potsdam dataset from COWC [Calculate also for spot6] mean=[0.3442, 0.3708, 0.3476], std=[0.1232, 0.1230, 0.1284]) ], additional_targets={'image_lq': 'image'}, bbox_params=BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels'])) data_transforms_test = Compose( [ Normalize( #mean std for potsdam dataset from COWC [Calculate also for spot6] mean=[0.3442, 0.3708, 0.3476], std=[0.1232, 0.1230, 0.1284]) ], additional_targets={'image_lq': 'image'}) self.data_dir_gt = data_dir_GT self.data_dir_lq = data_dir_LQ if training == True: self.dataset = COWCGANDataset(self.data_dir_gt, self.data_dir_lq, transform=data_transforms_train) else: self.dataset = COWCGANDataset(self.data_dir_gt, self.data_dir_lq, transform=data_transforms_test) self.length = len(self.dataset) super().__init__(self.dataset, batch_size, shuffle, validation_split, num_workers, collate_fn=collate_fn)
def test_input(self): safecrop = RandomSizedBBoxSafeCrop(image_in='x', height=self.height, width=self.width, bbox_in="x_bbox", bbox_params=BboxParams("coco")) output = safecrop.forward(data=self.single_input, state={}) with self.subTest('Check output type'): self.assertEqual(type(output), list) with self.subTest('Check output image shape'): self.assertEqual(output[0].shape, self.single_output_shape)
def __getitem__(self, idx): img = Image.open(self._images_path[idx]) img = np.array(img) # read keypoints # type - x - y keypoints = [] with open(self._keypoints_path[idx], 'r') as f: c, x, y = f.readlines().strip().split('\n') keypoints.append([self._opt['classes mapping'][c], x, y]) # coco format -> yolo format bbox_size = 32 # parametro configurabile h, w = img.shape[:2] bboxes = [[kp[1], kp[2], bbox_size, bbox_size] for kp in keypoints] normalized_bboxes = [[ bbox[0] / w, bbox[1] / h, bbox[2] / w, bbox[3] / h ] for bbox in bboxes] old_h = img.shape[0] old_w = img.shape[1] new_h = img.shape[0] new_w = img.shape[1] if not img.shape[1] % 32 == 0: new_w = old_w + (32 - old_w % 32) ratio = new_w / old_w new_h = int(old_h * ratio) self._transforms.append(Resize(new_h, new_w)) if not img.shape[0] % 32 == 0: new_h = new_h + (32 - new_h % 32) self._transforms.append(PadIfNeeded(new_h, new_w)) self._transforms.append(Normalize(p=1.0)) aug = Compose(self._transforms, bbox_params=BboxParams(format='yolo', label_fields=['category_id'])) res = aug(image=img, bboxes=normalized_bboxes, category_id=keypoints[:, 0]) img = res['image'] bboxes = res['bboxes'] classes_id = res['category_id'] img = img.transpose(2, 0, 1) labels = [[0, classes_id[i], bbox[0], bbox[1], bbox[2], bbox[3]] for i, bbox in enumerate(bboxes)] return torch.from_numpy(img), torch.from_numpy( labels), self._images_path[idx], None
def make_transforms(dict): if dict is None: return None transforms = [] if 'BboxParams' in dict: bbox_args = dict['BboxParams'] del dict['BboxParams'] else: bbox_args = {'format': 'pascal_voc', 'label_fields': ['labels']} for name, kwargs in dict.items(): transforms.append(instantiate(name, kwargs)) return Compose(transforms=transforms, bbox_params=BboxParams(**bbox_args))
def get_aug(min_area=0., min_visibility=0.): return Compose( OneOf([ RandomContrast(p=0.1, limit=(-0.5,1)), # -0.5 ~ 2 -- RandomBrightnessContrast RandomBrightness(p=0.05, limit=(-0.2,0.1)), HorizontalFlip(p=0.0), ], p=0.5), bbox_params=BboxParams(format='pascal_voc', min_area=min_area, min_visibility=min_visibility, label_fields=['category_id']) )
def get_aug(self, aug, min_area=0., min_visibility=0.3): """ Args: aug - set of albumentation augmentations min_area - minimum area to keep bbox min_visibility - minimum area percentage (to keep bbox) of original bbox after transform """ return Compose(aug, bbox_params=BboxParams(format='coco', min_area=min_area, min_visibility=min_visibility, label_fields=['category_id']))
def __init__(self, func: DualTransform, mode: Union[None, str, Iterable[str]] = None, ds_id: Union[None, str, Iterable[str]] = None, image_in: Optional[str] = None, mask_in: Optional[str] = None, masks_in: Optional[str] = None, bbox_in: Optional[str] = None, keypoints_in: Optional[str] = None, image_out: Optional[str] = None, mask_out: Optional[str] = None, masks_out: Optional[str] = None, bbox_out: Optional[str] = None, keypoints_out: Optional[str] = None, bbox_params: Union[BboxParams, str, None] = None, keypoint_params: Union[KeypointParams, str, None] = None, extra_in_keys: Optional[Dict[str, str]] = None, extra_out_keys: Optional[Dict[str, str]] = None): assert any((image_in, mask_in, masks_in, bbox_in, keypoints_in)), "At least one input must be non-None" image_out = image_out or image_in mask_out = mask_out or mask_in masks_out = masks_out or masks_in bbox_out = bbox_out or bbox_in keypoints_out = keypoints_out or keypoints_in keys = OrderedDict([("image", image_in), ("mask", mask_in), ("masks", masks_in), ("bboxes", bbox_in), ("keypoints", keypoints_in)]) if extra_in_keys: keys.update(extra_in_keys) self.keys_in = OrderedDict([(k, v) for k, v in keys.items() if v is not None]) keys = OrderedDict([("image", image_out), ("mask", mask_out), ("masks", masks_out), ("bboxes", bbox_out), ("keypoints", keypoints_out)]) if extra_out_keys: keys.update(extra_out_keys) self.keys_out = OrderedDict([(k, v) for k, v in keys.items() if v is not None]) super().__init__(inputs=list(self.keys_in.values()), outputs=list(self.keys_out.values()), mode=mode, ds_id=ds_id) if isinstance(bbox_params, str): bbox_params = BboxParams(bbox_params) if isinstance(keypoint_params, str): keypoint_params = KeypointParams(keypoint_params) self.func = Compose(transforms=[func], bbox_params=bbox_params, keypoint_params=keypoint_params)
def __init__(self, box_format='coco'): self.tsfm = Compose( [ HorizontalFlip(), # RandomResizedCrop(512, 512, scale=(0.75, 1)), RandomBrightnessContrast(0.4, 0.4), GaussNoise(), RGBShift(), CLAHE(), RandomGamma() ], bbox_params=BboxParams(format=box_format, min_visibility=0.75, label_fields=['labels']))
def train_multi_augment12(image, bboxes=None, category_id=None): h, w = image.shape[0], image.shape[1] if bboxes is not None: aug = Compose( [ HorizontalFlip(p=0.5), ShiftScaleRotate( shift_limit=0.05, scale_limit=0.05, rotate_limit=5, border_mode=cv2.BORDER_REPLICATE, p=1, ), RandomSizedCrop(min_max_height=(int(h * 0.9), h), height=h, width=w, p=0.25), RandomBrightnessContrast( brightness_limit=0.0, contrast_limit=0.3, p=0.25), ], p=1, bbox_params=BboxParams(format="pascal_voc", label_fields=["category_id"]), ) augmented = aug(image=image, bboxes=bboxes, category_id=category_id) else: # Normal aug = Compose( [ HorizontalFlip(p=0.5), ShiftScaleRotate( shift_limit=0.05, scale_limit=0.05, rotate_limit=5, border_mode=cv2.BORDER_REPLICATE, p=1, ), RandomSizedCrop(min_max_height=(int(h * 0.9), h), height=h, width=w, p=0.25), RandomBrightnessContrast( brightness_limit=0.3, contrast_limit=0.3, p=0.25), ], p=1, ) augmented = aug(image=image) return augmented
def transform_val(self, sample): """ data augmentation for training """ img = sample["image"] bboxes = sample["bboxes"] imgH = img.shape[0] imgW = img.shape[1] if imgW / imgH < 2.5: scale_factor = min(self.args.img_size[0] / imgH, self.args.img_size[1] / imgW) else: scale_factor = 1.0 random_scale = np.random.randint(8, 11) / 10 if bboxes.size == 0: bboxes = np.array([[ 0.1, 0.1, 0.1, 0.1, 0.0, 0.0 ]]) # this is just a dummy - all values must be inside (0,1) annotations = {'image': img, 'bboxes': bboxes} transforms = ( [ #Resize(height=int(scale_factor * imgH), width=int(scale_factor * imgW), # p=1.0), # PadIfNeeded(min_height=self.args.img_size[0], min_width=self.args.img_size[1], # border_mode=cv2.BORDER_REPLICATE, # p=1.0), # changing image size - mainting aspect ratio for later resize # OneOf([RandomCrop(height=self.args.img_size[0], width=self.args.img_size[1], p=0.5), # RandomCrop(height=int(random_scale * self.args.img_size[0]), # width=int(random_scale * self.args.img_size[1]), p=0.5)], p=1.0), # making sure resize fits with yolo input size Resize(height=self.args.img_size[0], width=self.args.img_size[1], p=1.0), Normalize(p=1.0) ]) preform_augmentation = Compose(transforms, bbox_params=BboxParams( format='yolo', min_visibility=0.3)) augmented_sample = preform_augmentation(**annotations) augmented_sample["bboxes"] = np.array(augmented_sample["bboxes"]) return augmented_sample
def train_transform( from_dicom: bool, longest_max_size: int, additional_transforms: List[BasicTransform] = None ) -> Compose: additional_transforms = additional_transforms if additional_transforms is not None else [] initial_transforms = [ LongestMaxSize(longest_max_size), ] if from_dicom: initial_transforms.insert(0, Lambda(image=stack_channels_for_rgb)) final_transforms = [ ToFloat(), ToTensorV2(), ] transforms = initial_transforms + additional_transforms + final_transforms return Compose(transforms, bbox_params=BboxParams(format='pascal_voc', label_fields=['labels']))
def __init__(self, train, size=(224, 224)): # transforms = [Resize(*size)] transforms = [] if train: # default p=0.5 transforms.extend([Flip(), Rotate()]) # normalize default imagenet # mean (0.485, 0.456, 0.406) # std (0.229, 0.224, 0.225) # transforms.append(Normalize()) self.aug = Compose(transforms) self.aug_train = Compose(transforms, bbox_params=BboxParams( format='pascal_voc', label_fields=['labels']))
def strong_aug(p=0.6): return Compose([ RandomShadow(shadow_roi=(0, 0, 1, 1), p=0.75), OneOf([MotionBlur(), GaussianBlur()]), OneOf([ToGray(), ToSepia()]), OneOf([ InvertImg(), RandomBrightnessContrast(brightness_limit=0.75, p=0.75), RandomGamma(), HueSaturationValue() ], p=0.75) ], bbox_params=BboxParams("pascal_voc", label_fields=["category_id"], min_area=0.0, min_visibility=0.0), p=p)
def __init__(self, data_dir, batch_size, shuffle=True, validation_split=0.0, num_workers=1, training=True): #data transformation #According to this link: https://discuss.pytorch.org/t/normalization-of-input-image/34814/8 #satellite image 0.5 is good otherwise calculate mean and std for the whole dataset. #calculted mean and std using method from util data_transforms = Compose( [ Resize(256, 256), HorizontalFlip(), OneOf([ IAAAdditiveGaussianNoise(), GaussNoise(), ], p=0.2), OneOf([ CLAHE(clip_limit=2), IAASharpen(), IAAEmboss(), RandomBrightnessContrast(), ], p=0.3), HueSaturationValue(p=0.3), Normalize( #mean std for potsdam dataset from COWC [Calculate also for spot6] mean=[0.3442, 0.3708, 0.3476], std=[0.1232, 0.1230, 0.1284]) ], bbox_params=BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels'])) self.data_dir = data_dir self.dataset = COWCDataset(self.data_dir, transform=data_transforms) super().__init__(self.dataset, batch_size, shuffle, validation_split, num_workers, collate_fn=collate_fn)
def get_aug(self, label_field, min_area=0.0, min_visibility=0.0): """returns a function that applies the list of transformations. Args: label_field (str): The feild in the dictionary that contains the name labels min_area (float, optional): minimum area of bbox that is considered min_visibility (float, optional): minimum area of bbox to be visible Returns: function: function to apply list of agumentations on images and bboxes """ return Compose(self.aug, bbox_params=BboxParams(format=self.annotation_format, min_area=min_area, min_visibility=min_visibility, label_fields=[label_field]), p=self.p)
def __init__(self, train, size=(224, 224)): transforms = [Resize(*size)] if train: # default p=0.5 transforms.extend([ Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), p=1), Flip(), Rotate() ]) # transforms.extend([ # ToTensor() # ]) self.aug = Compose(transforms) self.aug_train = Compose(transforms, bbox_params=BboxParams( format='pascal_voc', label_fields=['labels']))
def __data_parse(self, ID): vars_dict = {} data = pd.read_excel(os.path.join('datasets', self.dataset, self.subset, ID + '.xlsx'), sheet_name=None) for series_num, (sheet_name, df) in enumerate(data.items()): lines = [[ float(x) if x.replace('.', '').replace('-', '').isnumeric() else x for x in l.split(',') ][1:] for l in df.to_csv().split('\n')][:-1] vars_dict.update( self.__parse_global(lines[:2], self.global_vars, series_num, 'global_vars')) #vars_dict.update(self.__parse_objects(lines[2:], self.objects_vars, series_num, 'objects_vars')) transformed = {k: v['value'] for k, v in vars_dict.items()} transform = Compose( self.augment_params, additional_targets={k: v['type'] for k, v in vars_dict.items()}, keypoint_params=KeypointParams(format='xy', remove_invisible=False), bbox_params=BboxParams(format='albumentations', min_visibility=0.1), p=0.5) transformed = transform( image=np.array([[0, 0, 0]], dtype='uint8'), bboxes=[], keypoints=[], **{k: v['value'] for k, v in vars_dict.items()}) for k, v in transformed.items(): if k in ['image', 'bboxes', 'keypoints']: continue vars_dict[k].update({'value': v}) return vars_dict
def __init__(self, image_fns, gt_boxes=None, label_to_int=None, augment=False, train_image_dir='train_images', test_image_dir='test_images', height=1536, width=1536, feature_scale=0.25): self.image_fns = image_fns self.gt_boxes = gt_boxes self.label_to_int = label_to_int self.augment = augment self.aug = Compose([ ShiftScaleRotate(p=0.9, rotate_limit=10, scale_limit=0.2, border_mode=cv2.BORDER_CONSTANT), RandomCrop(512, 512, p=1.0), ToGray(), CLAHE(), GaussNoise(), GaussianBlur(), RandomBrightnessContrast(), RandomGamma(), RGBShift(), HueSaturationValue(), ], bbox_params=BboxParams(format='coco', min_visibility=0.75)) self.encoded_cache = None self.height = height self.width = width self.feature_scale = feature_scale
def get_estimator(data_dir=None, model_dir=tempfile.mkdtemp(), batch_size=16, epochs=13, max_train_steps_per_epoch=None, max_eval_steps_per_epoch=None, image_size=512, num_classes=90): # pipeline train_ds, eval_ds = mscoco.load_data(root_dir=data_dir) pipeline = fe.Pipeline( train_data=train_ds, eval_data=eval_ds, batch_size=batch_size, ops=[ ReadImage(inputs="image", outputs="image"), LongestMaxSize(image_size, image_in="image", image_out="image", bbox_in="bbox", bbox_out="bbox", bbox_params=BboxParams("coco", min_area=1.0)), PadIfNeeded( image_size, image_size, border_mode=cv2.BORDER_CONSTANT, image_in="image", image_out="image", bbox_in="bbox", bbox_out="bbox", bbox_params=BboxParams("coco", min_area=1.0), ), Sometimes( HorizontalFlip(mode="train", image_in="image", image_out="image", bbox_in="bbox", bbox_out="bbox", bbox_params='coco')), # normalize from uint8 to [-1, 1] Normalize(inputs="image", outputs="image", mean=1.0, std=1.0, max_pixel_value=127.5), ShiftLabel(inputs="bbox", outputs="bbox"), AnchorBox(inputs="bbox", outputs="anchorbox", width=image_size, height=image_size), ChannelTranspose(inputs="image", outputs="image") ], pad_value=0) # network model = fe.build(model_fn=lambda: RetinaNet(num_classes=num_classes), optimizer_fn=lambda x: torch.optim.SGD( x, lr=2e-4, momentum=0.9, weight_decay=0.0001)) network = fe.Network(ops=[ ModelOp(model=model, inputs="image", outputs=["cls_pred", "loc_pred"]), RetinaLoss(inputs=["anchorbox", "cls_pred", "loc_pred"], outputs=["total_loss", "focal_loss", "l1_loss"]), UpdateOp(model=model, loss_name="total_loss"), PredictBox(input_shape=(image_size, image_size, 3), inputs=["cls_pred", "loc_pred"], outputs="pred", mode="eval") ]) # estimator traces = [ LRScheduler(model=model, lr_fn=lr_fn), BestModelSaver(model=model, save_dir=model_dir, metric='mAP', save_best_mode="max"), MeanAveragePrecision(num_classes=num_classes, true_key='bbox', pred_key='pred', mode="eval") ] estimator = fe.Estimator( pipeline=pipeline, network=network, epochs=epochs, traces=traces, max_train_steps_per_epoch=max_train_steps_per_epoch, max_eval_steps_per_epoch=max_eval_steps_per_epoch, monitor_names=["l1_loss", "focal_loss"]) return estimator
def get_estimator(data_dir=None, model_dir=tempfile.mkdtemp(), epochs=200, batch_size_per_gpu=32, train_steps_per_epoch=None, eval_steps_per_epoch=None): num_device = get_num_devices() train_ds, val_ds = mscoco.load_data(root_dir=data_dir) train_ds = PreMosaicDataset(mscoco_ds=train_ds) batch_size = num_device * batch_size_per_gpu pipeline = fe.Pipeline( train_data=train_ds, eval_data=val_ds, ops=[ ReadImage(inputs=("image1", "image2", "image3", "image4"), outputs=("image1", "image2", "image3", "image4"), mode="train"), ReadImage(inputs="image", outputs="image", mode="eval"), LongestMaxSize(max_size=640, image_in="image1", bbox_in="bbox1", bbox_params=BboxParams("coco", min_area=1.0), mode="train"), LongestMaxSize(max_size=640, image_in="image2", bbox_in="bbox2", bbox_params=BboxParams("coco", min_area=1.0), mode="train"), LongestMaxSize(max_size=640, image_in="image3", bbox_in="bbox3", bbox_params=BboxParams("coco", min_area=1.0), mode="train"), LongestMaxSize(max_size=640, image_in="image4", bbox_in="bbox4", bbox_params=BboxParams("coco", min_area=1.0), mode="train"), LongestMaxSize(max_size=640, image_in="image", bbox_in="bbox", bbox_params=BboxParams("coco", min_area=1.0), mode="eval"), PadIfNeeded(min_height=640, min_width=640, image_in="image", bbox_in="bbox", bbox_params=BboxParams("coco", min_area=1.0), mode="eval", border_mode=cv2.BORDER_CONSTANT, value=(114, 114, 114)), CombineMosaic(inputs=("image1", "image2", "image3", "image4", "bbox1", "bbox2", "bbox3", "bbox4"), outputs=("image", "bbox"), mode="train"), CenterCrop(height=640, width=640, image_in="image", bbox_in="bbox", bbox_params=BboxParams("coco", min_area=1.0), mode="train"), Sometimes( HorizontalFlip(image_in="image", bbox_in="bbox", bbox_params=BboxParams("coco", min_area=1.0), mode="train")), HSVAugment(inputs="image", outputs="image", mode="train"), ToArray(inputs="bbox", outputs="bbox", dtype="float32"), CategoryID2ClassID(inputs="bbox", outputs="bbox"), GTBox(inputs="bbox", outputs=("gt_sbbox", "gt_mbbox", "gt_lbbox"), image_size=640), Delete(keys=("image1", "image2", "image3", "image4", "bbox1", "bbox2", "bbox3", "bbox4", "bbox"), mode="train"), Delete(keys="image_id", mode="eval"), Batch(batch_size=batch_size, pad_value=0) ]) init_lr = 1e-2 / 64 * batch_size model = fe.build( lambda: YoloV5(w=640, h=640, c=3), optimizer_fn=lambda x: torch.optim.SGD( x, lr=init_lr, momentum=0.937, weight_decay=0.0005, nesterov=True), mixed_precision=True) network = fe.Network(ops=[ RescaleTranspose(inputs="image", outputs="image"), ModelOp(model=model, inputs="image", outputs=("pred_s", "pred_m", "pred_l")), DecodePred(inputs=("pred_s", "pred_m", "pred_l"), outputs=("pred_s", "pred_m", "pred_l")), ComputeLoss(inputs=("pred_s", "gt_sbbox"), outputs=("sbbox_loss", "sconf_loss", "scls_loss")), ComputeLoss(inputs=("pred_m", "gt_mbbox"), outputs=("mbbox_loss", "mconf_loss", "mcls_loss")), ComputeLoss(inputs=("pred_l", "gt_lbbox"), outputs=("lbbox_loss", "lconf_loss", "lcls_loss")), Average(inputs=("sbbox_loss", "mbbox_loss", "lbbox_loss"), outputs="bbox_loss"), Average(inputs=("sconf_loss", "mconf_loss", "lconf_loss"), outputs="conf_loss"), Average(inputs=("scls_loss", "mcls_loss", "lcls_loss"), outputs="cls_loss"), Average(inputs=("bbox_loss", "conf_loss", "cls_loss"), outputs="total_loss"), PredictBox(width=640, height=640, inputs=("pred_s", "pred_m", "pred_l"), outputs="box_pred", mode="eval"), UpdateOp(model=model, loss_name="total_loss") ]) traces = [ MeanAveragePrecision(num_classes=80, true_key='bbox', pred_key='box_pred', mode="eval"), BestModelSaver(model=model, save_dir=model_dir, metric='mAP', save_best_mode="max") ] lr_schedule = { 1: LRScheduler(model=model, lr_fn=lambda step: lr_schedule_warmup( step, train_steps_epoch=np.ceil(len(train_ds) / batch_size), init_lr=init_lr)), 4: LRScheduler(model=model, lr_fn=lambda epoch: cosine_decay(epoch, cycle_length=epochs - 3, init_lr=init_lr, min_lr=init_lr / 100, start=4)) } traces.append(EpochScheduler(lr_schedule)) estimator = fe.Estimator( pipeline=pipeline, network=network, epochs=epochs, traces=traces, monitor_names=["bbox_loss", "conf_loss", "cls_loss"], train_steps_per_epoch=train_steps_per_epoch, eval_steps_per_epoch=eval_steps_per_epoch) return estimator
def __init__(self, is_train: bool, to_pytorch: bool, preprocess: callable): super().__init__(is_train, to_pytorch, preprocess) self._aug = Compose([self._aug], bbox_params=BboxParams( format='coco', label_fields=['category_ids']))
def get_aug(aug): return Compose(aug, bbox_params=BboxParams(format="pascal_voc", label_fields=["gt_labels"]))
def get_aug(aug, min_area=0., min_visibility=0.25): return Compose(aug, bbox_params=BboxParams(format='coco', min_area=min_area, min_visibility=min_visibility, label_fields=['category_id']))
def compose(transforms_to_compose): # combine all augmentations into one single pipeline result = albu.Compose( [item for sublist in transforms_to_compose for item in sublist], bbox_params=BboxParams(format='coco', label_fields=['category_id'])) return result
def get_bbox_params(self): return BboxParams(format='coco', label_fields=['category_id'])