def mapper(dataset_dict): # 自定义mapper dataset_dict = copy.deepcopy(dataset_dict) # 后面要改变这个dict,所以先复制 image = utils.read_image(dataset_dict["file_name"], format="BGR") # 读取图片,numpy array # image, transforms = T.apply_transform_gens( # [T.Resize((800, 800)), T.RandomContrast(0.1, 3), T.RandomSaturation(0.1, 2), T.RandomRotation(angle=[0, 180]), # T.RandomFlip(prob=0.4, horizontal=False, vertical=True), T.RandomCrop('relative_range', (0.4, 0.6))], image) # 数组增强 # image, transforms = T.apply_transform_gens( # [T.Resize((800, 800)), T.RandomContrast(0.1, 3), T.RandomSaturation(0.1, 2), # T.RandomFlip(prob=0.4, horizontal=True, vertical=False), T.RandomCrop('relative_range', (0.4, 0.6))], image) image, transforms = T.apply_transform_gens( [T.Resize((800, 800)), T.RandomContrast(0.1, 3), T.RandomSaturation(0.1, 2), T.RandomFlip(prob=0.4, horizontal=True, vertical=False)], image) # 数组增强 dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) # 转成Tensor annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] # 数据增强要同步标注 instances = utils.annotations_to_instances(annos, image.shape[:2]) # 将标注转成Instance(Tensor) dataset_dict["instances"] = utils.filter_empty_instances(instances) # 去除空的 return dataset_dict
def bulb_traffic_light_augmentation(cfg, is_train): if is_train: min_size = cfg.INPUT.MIN_SIZE_TRAIN max_size = cfg.INPUT.MAX_SIZE_TRAIN sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING else: min_size = cfg.INPUT.MIN_SIZE_TEST max_size = cfg.INPUT.MAX_SIZE_TEST sample_style = "choice" if sample_style == "range": assert len(min_size) == 2, "more than 2 ({}) min_size(s) are provided for ranges".format( len(min_size) ) logger = logging.getLogger(__name__) augmentation = [] augmentation.append(T.ResizeShortestEdge(min_size, max_size, sample_style)) if is_train: train_augmentation = [ # T.RandomFlip(), # T.RandomApply(T.RandomFlip(prob=0.5, horizontal=False, vertical=True), prob=0.3), # T.RandomApply(T.RandomRotation(angle=[-5, 5]), prob=0.15), T.RandomApply(T.RandomContrast(intensity_min=0.8, intensity_max=1.2), prob=0.15), T.RandomApply(T.RandomBrightness(intensity_min=0.8, intensity_max=1.2), prob=0.15), T.RandomApply(T.RandomSaturation(intensity_min=0.8, intensity_max=1.2), prob=0.15), T.RandomApply(T.RandomLighting(0.8), prob=0.15) ] augmentation.extend(train_augmentation) logger.info("Augmentations used in training: " + str(augmentation)) return augmentation
def mapper(dataset_dict): # Implement a mapper, similar to the default DatasetMapper, but with your own customizations dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format="BGR") image, transforms = T.apply_transform_gens([ T.RandomFlip(prob=0.50, horizontal=True, vertical=False), T.RandomApply(tfm_or_aug=T.RandomBrightness(intensity_min=0.7, intensity_max=1.1), prob=0.40), T.RandomApply(tfm_or_aug=T.RandomSaturation(intensity_min=0.7, intensity_max=1.1), prob=0.40) ], image) dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def build_transform_gen(cfg, is_train): """ Create a list of :class:`TransformGen` from config. Now it includes resizing and flipping. Returns: list[TransformGen] """ if is_train: min_size = cfg.INPUT.MIN_SIZE_TRAIN max_size = cfg.INPUT.MAX_SIZE_TRAIN sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING else: min_size = cfg.INPUT.MIN_SIZE_TEST max_size = cfg.INPUT.MAX_SIZE_TEST sample_style = "choice" if sample_style == "range": assert len(min_size) == 2, "more than 2 ({}) min_size(s) are provided for ranges".format( len(min_size) ) logger = logging.getLogger("detectron2.data.detection_utils") tfm_gens = [] tfm_gens.append(T.ResizeShortestEdge(min_size, max_size, sample_style)) if is_train: tfm_gens.append(T.RandomContrast(0.5, 1.5)) tfm_gens.append(T.RandomBrightness(0.5, 1.5)) tfm_gens.append(T.RandomSaturation(0.5, 1.5)) tfm_gens.append(T.RandomFlip()) logger.info("TransformGens used in training[Updated]: " + str(tfm_gens)) return tfm_gens
def custom_mapper(dataset_dict): # it will be modified by code below dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict["file_name"], format="BGR") transform_list = [ T.Resize((512, 512)), T.RandomBrightness(0.8, 1.8), T.RandomContrast(0.6, 1.3), T.RandomSaturation(0.8, 1.4), T.RandomRotation(angle=[30, 30]), T.RandomLighting(0.7), T.RandomFlip(prob=0.4, horizontal=False, vertical=True), ] image, transforms = T.apply_transform_gens(transform_list, image) dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def custom_mapper(dataset_dict, size, flip_prob, min_brightness, max_brightness, \ min_contrast, max_contrast, min_saturation, max_saturation): # Implement a mapper, similar to the default DatasetMapper, but with your own customizations dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below image = detection_utils.read_image(dataset_dict["file_name"], format="BGR") transform_list = [ T.Resize(size), T.RandomBrightness(min_brightness, max_brightness), T.RandomContrast(min_contrast, max_contrast), T.RandomSaturation(min_saturation, max_saturation), T.RandomFlip(prob=flip_prob, horizontal=False, vertical=True), T.RandomFlip(prob=flip_prob, horizontal=True, vertical=False), ] image, transforms = T.apply_transform_gens(transform_list, image) dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) annos = [ detection_utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = detection_utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = detection_utils.filter_empty_instances(instances) return dataset_dict
def __init__(self, cfg, is_train=True): assert cfg.MODEL.MASK_ON, '今回はセグメンテーションのみを対象にする' assert not cfg.MODEL.KEYPOINT_ON, 'キーポイントは扱わない' assert not cfg.MODEL.LOAD_PROPOSALS, 'pre-computed proposals っていうのがよくわからん・・・・とりあえず無効前提で' self.cont_gen = None self.bright_gen = None self.sat_gen = None self.cutout_gen = None self.extent_gen = None self.crop_gen = None self.rotate_gen = None self.shear_gen = None if is_train: if cfg.INPUT.CONTRAST.ENABLED: self.cont_gen = T.RandomContrast(cfg.INPUT.CONTRAST.RANGE[0], cfg.INPUT.CONTRAST.RANGE[1]) logging.getLogger(__name__).info('ContGen used in training.') if cfg.INPUT.BRIGHTNESS.ENABLED: self.bright_gen = T.RandomBrightness( cfg.INPUT.BRIGHTNESS.RANGE[0], cfg.INPUT.BRIGHTNESS.RANGE[1]) logging.getLogger(__name__).info('BrightGen used in training.') if cfg.INPUT.SATURATION.ENABLED: self.sat_gen = T.RandomSaturation( cfg.INPUT.SATURATION.RANGE[0], cfg.INPUT.SATURATION.RANGE[1]) logging.getLogger(__name__).info('SatGen used in training.') if cfg.INPUT.CUTOUT.ENABLED: self.cutout_gen = RandomCutout(cfg.INPUT.CUTOUT.NUM_HOLE_RANGE, cfg.INPUT.CUTOUT.RADIUS_RANGE, cfg.INPUT.CUTOUT.COLOR_RANGE) logging.getLogger(__name__).info('CutoutGen used in training.') if cfg.INPUT.EXTENT.ENABLED: self.extent_gen = T.RandomExtent( scale_range=(1, 1), shift_range=cfg.INPUT.EXTENT.SHIFT_RANGE) logging.getLogger(__name__).info('ExtentGen used in training.') if cfg.INPUT.CROP.ENABLED: self.crop_gen = T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE) logging.getLogger(__name__).info('CropGen used in training: ' + str(self.crop_gen)) if cfg.INPUT.ROTATE.ENABLED: self.rotate_gen = T.RandomRotation(cfg.INPUT.ROTATE.ANGLE, expand=False) logging.getLogger(__name__).info('RotateGen used in training.') if cfg.INPUT.SHEAR.ENABLED: self.shear_gen = RandomShear(cfg.INPUT.SHEAR.ANGLE_H_RANGE, cfg.INPUT.SHEAR.ANGLE_V_RANGE) logging.getLogger(__name__).info('ShearGen used in training.') self.tfm_gens = utils.build_transform_gen(cfg, is_train) self.img_format = cfg.INPUT.FORMAT self.mask_format = cfg.INPUT.MASK_FORMAT self.is_train = is_train
def build_train_aug(cfg): augs = [ T.ResizeShortestEdge(cfg.INPUT.MIN_SIZE_TRAIN, cfg.INPUT.MAX_SIZE_TRAIN, cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING), T.RandomContrast(0.5, 1.5), T.RandomBrightness(0.5, 1.5), T.RandomSaturation(0.5, 1.5), T.RandomFlip(), ] if cfg.INPUT.CROP.ENABLED: augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)) return augs
def build_train_loader(cfg): input_size = cfg.MODEL.CLSNET.INPUT_SIZE return build_detection_train_loader( cfg, mapper=DatasetMapper(cfg, is_train=True, augmentations=[ T.Resize((input_size, input_size)), T.RandomContrast(0.5, 1.5), T.RandomBrightness(0.5, 1.5), T.RandomSaturation(0.5, 1.5) ]))
def build_train_loader(cls, cfg): return build_detection_train_loader( cfg, mapper=DatasetMapper(cfg, is_train=True, augmentations=[ T.RandomCrop("absolute_range", (300, 600)), T.RandomRotation(random.randrange(0, 360)), T.RandomContrast(0.5, 1.5), T.RandomSaturation(0.5, 1.5) ]))
def custom_mapper(dataset_dict): dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format="BGR") transform_list = [ InvertColors(), T.RandomBrightness(0.8, 1.8), T.RandomContrast(0.6, 1.3), T.RandomSaturation(0.8, 1.4), T.RandomLighting(0.7), ] image, transforms = T.apply_transform_gens(transform_list, image) dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") ] instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __init__(self, cfg, is_train=True): if cfg.INPUT.CROP.ENABLED and is_train: self.crop_gen = T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE) # logging.getLogger(__name__).info("CropGen used in training: " + str(self.crop_gen)) else: self.crop_gen = None self.tfm_gens = [ T.RandomBrightness(0.5, 1.6), T.RandomContrast(0.5, 1), T.RandomSaturation(0.5, 1), T.RandomRotation(angle=[-90, 90]), T.RandomFlip(horizontal=True, vertical=False), T.RandomCrop('relative_range', (0.4, 0.6)), T.Resize((640,640)), # CutOut() ] # self.tfm_gens = utils.build_transform_gen(cfg, is_train) # fmt: off self.img_format = cfg.INPUT.FORMAT self.mask_on = cfg.MODEL.MASK_ON self.mask_format = cfg.INPUT.MASK_FORMAT self.keypoint_on = cfg.MODEL.KEYPOINT_ON self.load_proposals = cfg.MODEL.LOAD_PROPOSALS # fmt: on if self.keypoint_on and is_train: # Flip only makes sense in training self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN) else: self.keypoint_hflip_indices = None if self.load_proposals: self.min_box_side_len = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE self.proposal_topk = ( cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN if is_train else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST ) self.is_train = is_train
def build_transform_gen(cfg, is_train): """ Create a list of :class:`TransformGen` from config. Now it includes resizing and flipping. Returns: list[TransformGen] """ input_size = cfg.MODEL.CLSNET.INPUT_SIZE logger = logging.getLogger("detectron2.data.classification_utils") tfm_gens = [] tfm_gens.append(T.Resize((input_size, input_size))) if is_train: tfm_gens.append(T.RandomContrast(0.5, 1.5)) tfm_gens.append(T.RandomBrightness(0.5, 1.5)) tfm_gens.append(T.RandomSaturation(0.5, 1.5)) tfm_gens.append(T.RandomFlip()) logger.info("TransformGens used in training[Updated]: " + str(tfm_gens)) return tfm_gens
def custom_mapper(dataset_dict): dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict["file_name"], format="BGR") image, transforms = T.apply_transform_gens([ T.Resize((1920, 1080)), T.RandomFlip(0.1), T.RandomSaturation(0.9, 1.1), T.RandomBrightness(0.9, 1.1), T.RandomContrast(0.9, 1.1) ], image) dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def build_aug_transforms( cfg: detectron2.config.CfgNode, flip_horiz: bool = True, flip_vert: bool = False, max_rotate: int = 10, brightness_limits: Tuple[int, int] = (0.8, 1.4), contrast_limits: Tuple[int, int] = (0.8, 1.4), saturation_limits: Tuple[int, int] = (0.8, 1.4), p_lighting: float = 0.75 ) -> detectron2.data.transforms.AugmentationList: "Build a list of detectron2 augmentations" augs = [] augs.append( T.ResizeShortestEdge(cfg.INPUT.MIN_SIZE_TRAIN, cfg.INPUT.MAX_SIZE_TRAIN, cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING)) if flip_horiz: augs.append(T.RandomFlip(prob=0.5, horizontal=True, vertical=False)) if flip_vert: augs.append(T.RandomFlip(prob=0.5, horizontal=False, vertical=True)) if max_rotate: augs.append( T.RandomRotation(angle=[-max_rotate, max_rotate], expand=False)) if brightness_limits: augs.append( T.RandomApply(prob=p_lighting, tfm_or_aug=T.RandomBrightness(*brightness_limits))) if contrast_limits: augs.append( T.RandomApply(prob=p_lighting, tfm_or_aug=T.RandomContrast(*contrast_limits))) if saturation_limits: augs.append( T.RandomApply(prob=p_lighting, tfm_or_aug=T.RandomSaturation(*saturation_limits))) return augs
def build_augmentation(cfg, is_train): """ With option to don't use hflip Returns: list[Augmentation] """ if is_train: min_size = cfg.INPUT.MIN_SIZE_TRAIN max_size = cfg.INPUT.MAX_SIZE_TRAIN sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING else: min_size = cfg.INPUT.MIN_SIZE_TEST max_size = cfg.INPUT.MAX_SIZE_TEST sample_style = "choice" if sample_style == "range": assert ( len(min_size) == 2 ), "more than 2 ({}) min_size(s) are provided for ranges".format( len(min_size)) logger = logging.getLogger(__name__) augmentation = [] augmentation.append(T.ResizeShortestEdge(min_size, max_size, sample_style)) if is_train: if cfg.INPUT.HFLIP_TRAIN: augmentation.append(T.RandomFlip()) if cfg.INPUT.BRIGHTNESS_TRAIN: augmentation.append(T.RandomBrightness(0.9, 1.1)) if cfg.INPUT.CONTRAST_TRAIN: augmentation.append(T.RandomContrast(0.9, 1.1)) if cfg.INPUT.SATURATION_TRAIN: augmentation.append(T.RandomSaturation(0.9, 1.1)) logger.info("Augmentations used in training: " + str(augmentation)) return augmentation
def __call__(self, dataset_dict): image = utils.read_image(dataset_dict["file_name"], format="BGR") utils.check_image_size(dataset_dict, image) data_transformations = [] if self.is_train: # Crop if self.crop: crop_gen = T.RandomCrop(self.crop_type, self.crop_size) data_transformations.append(crop_gen) print('crop') # Horizontal flip if self.flip: flip_gen = T.RandomFlip() data_transformations.append(flip_gen) # if self.rotation: # rotation_gen = T.RandomRotation([0, 90]) # data_transformations.append(rotation_gen) if self.saturation: saturation_gen = T.RandomSaturation(0.5, 1.5) data_transformations.append(saturation_gen) print(str(dataset_dict["file_name"])) image, transforms = T.apply_transform_gens(data_transformations, image) print( '\n\n -------------------PRINTING IMAGE---------------------- \n\n' ) img_name = dataset_dict["file_name"][len(dataset_dict["file_name"]) - 15:len(dataset_dict["file_name"] ) - 4] img_name = '/home/grupo01/images_augmented/' + img_name + '_augmented.png' print(len(dataset_dict["file_name"])) print(img_name) cv2.imwrite(img_name, image) dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) image_shape = image.shape[:2] if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=None) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
from detectron2.config import get_cfg from detectron2.data import build_detection_train_loader, build_detection_test_loader, DatasetMapper, detection_utils from detectron2.data.samplers import GroupedBatchSampler, TrainingSampler, InferenceSampler, RepeatFactorTrainingSampler import detectron2.data.transforms as T augmentations = [ T.RandomFlip(prob=0.25, horizontal=True, vertical=False), T.RandomFlip(prob=0.25, horizontal=False, vertical=True), T.RandomApply(tfm_or_aug=T.RandomBrightness(intensity_min=0.75, intensity_max=1.25), prob=0.25), T.RandomApply(tfm_or_aug=T.RandomContrast(intensity_min=0.75, intensity_max=1.25), prob=0.25), T.RandomApply(tfm_or_aug=T.RandomCrop(crop_type="relative_range", crop_size=(0.75, 0.75)), prob=0.25), T.RandomApply(tfm_or_aug=T.RandomSaturation(intensity_min=0.75, intensity_max=1.25), prob=0.25), T.RandomApply(tfm_or_aug=T.RandomRotation(angle=[-30,30], expand=True, center=None, sample_style="range", interp=None), prob=0.25) ] def mapper(dataset_dict): # Implement a mapper, similar to the default DatasetMapper, but with your own customizations dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below image = detection_utils.read_image(dataset_dict["file_name"], format="BGR") image, transforms = T.apply_transform_gens(augmentations, image) dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) annos = [
def train_mapper(self,dataset_dict):#,dataset_used): # Implement a mapper, similar to the default DatasetMapper, but with your own customizations # Create a copy of the dataset dict dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below ##### Image Transformations ##### # Read in the image image = utils.read_image(dataset_dict["file_name"], format="BGR") # fileName = dataset_dict["file_name"] ## Crop to bounding box ## # Crop for all but comparison if(self.dataset_used != "comparison" and self.is_crop_to_bbox): # Get the bounding box bbox = ((dataset_dict["annotations"])[0])["bbox"] xmin,ymin,xmax,ymax = bbox w = xmax-xmin h = ymax-ymin # IsCropToBBox = True # if(IsCropToBBox): # Nudge the crop to be slightly outside of the bounding box nudgedXMin = xmin-15 nudgedYMin = ymin-15 nudgedW = w+50 nudgedH = h+50 # If the bounding boxes go outside of the image dimensions, fix this imageHeight = image.shape[0] imageWidth = image.shape[1] if(nudgedXMin < 0): nudgedXMin = 0 if(nudgedYMin < 0): nudgedYMin = 0 if(nudgedXMin+nudgedW >= imageWidth): nudgedW = imageWidth-1 if(nudgedYMin+nudgedH >= imageHeight): nudgedH = imageHeight-1 # Apply the crop cropT = T.CropTransform(nudgedXMin,nudgedYMin,nudgedW,nudgedH) image = cropT.apply_image(image) transforms = T.TransformList([cropT]) # Comparison has bbox the size of the image, so dont bother cropping else: # scaled between 0.5 and 1; shifted up to 0.5 in each dimension # randomExtant = T.RandomExtent( (0.5,1),(0.5,0.5) ) # transforms = T.TransformList([randomExtant]) transforms = T.TransformList([]) # Apply the crop to the bbox as well # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] # Add to the list of transforms # else: # nudgedH = dataset_dict["height"] # nudgedW = dataset_dict["width"] ## Scale the image size ## # thresholdDimension = 1000 # if(dataset_used == "large"): # thresholdDimension = 500 # thresholdDimension = 800 # thresholdDimension = 600 thresholdDimension = self.threshold_dimension currWidth = dataset_dict["width"] currHeight = dataset_dict["height"] # NOTE: YOLO input size must be multiple of 32 if(self.modelLink in ["VGG19_BN","YOLOV3"]): vgg_im_size = thresholdDimension # Apply the scaling transform scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=vgg_im_size,new_w=vgg_im_size,interp="nearest") image = scaleT.apply_image(image.copy()) # Apply the scaling to the bbox # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms # Add this to the list of transforms transforms = transforms + scaleT # Set the dimensions dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] else:# Downscale only at this threshold if(currHeight > thresholdDimension or currWidth > thresholdDimension): myNewH = 0 myNewW = 0 # Scale the longest dimension to threshold, other in proportion if(currHeight > currWidth): myNewH = thresholdDimension ratio = currHeight/float(myNewH) myNewW = currWidth/float(ratio) myNewW = int(round(myNewW)) # myNewW = 800 else: # myNewH = 800 myNewW = thresholdDimension ratio = currWidth/float(myNewW) myNewH = currHeight/float(ratio) myNewH = int(round(myNewH)) # Apply the scaling transform if(self.fixed_wh): scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") else: scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") image = scaleT.apply_image(image.copy()) # Apply the scaling to the bbox # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms # Add this to the list of transforms transforms = transforms + scaleT # Set the dimensions dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] ## Apply a random flip ## image, tfms = T.apply_transform_gens([T.RandomFlip()], image) transforms = transforms + tfms # Apply Other Transforms ## # Standard random image mods if(self.dataset_used != "comparison"): image, tfms = T.apply_transform_gens([T.RandomBrightness(0.4,1.6),T.RandomContrast(0.4,1.6),T.RandomSaturation(0.5,1.5),T.RandomLighting(1.2)], image) # More extreme for comparison set else: image, tfms = T.apply_transform_gens([T.RandomBrightness(0.2,1.8),T.RandomContrast(0.2,1.8),T.RandomSaturation(0.3,1.7),T.RandomLighting(1.5)], image) transforms = transforms + tfms ## Apply random affine (actually just a shear) ## # Pass in the image size PILImage = Image.fromarray(image) # Standard affine if(self.dataset_used != "comparison"): shear_range = 8 angle_range = 30 # rand_shear = (np.random.uniform(-shear_range,shear_range),np.random.uniform(-8,8)) # rand_angle = np.random.uniform(-30,30) # More extreme random affine for comparison else: shear_range = 50 angle_range = 30 # rand_shear = (np.random.uniform(-30,30),np.random.uniform(-30,30)) # rand_angle = np.random.uniform(-70,70) rand_shear = (np.random.uniform(-shear_range,shear_range),np.random.uniform(-shear_range,shear_range)) rand_angle = np.random.uniform(-angle_range,angle_range) RandAffT = RandomAffineTransform(PILImage.size,shear=rand_shear,angle=rand_angle) # Apply affine to image image = RandAffT.apply_image(image.copy()) # Append to transforms transforms = transforms + RandAffT ##### END Image Transformations ##### # Keep these in for now I suppose if(image.shape[0] == 0): raise ValueError("image shape[0] is 0!: ",print(image.shape),dataset_dict["file_name"]) if(image.shape[1] == 0): raise ValueError("image shape[1] is 0!: ",print(image.shape),dataset_dict["file_name"]) # Set the image in the dictionary dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) # Do remainder of dictionary classID = ((dataset_dict["annotations"])[0])["category_id"] dataset_dict["classID"] = classID # bboxes # if(self.dataset_used != "comparison"): annos = \ [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] # transformNames = [transforms.__name__ for x in transforms] # transformNames = ", ".join(transformNames) instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) # # no bboxes # else: # instances = Instances( (dataset_dict["height"],dataset_dict["width"]) ) # instances.gt_classes = torch.tensor([dataset_dict["classID"]]) # dataset_dict["instances"] = instances dataset_dict["transforms"] = transforms return dataset_dict
from detectron2.data import DatasetMapper from detectron2.utils.visualizer import Visualizer import os import cv2 import random import numpy as np print(torch.__version__) import Params as P USE_SAVED_MODEL = True SHOW_INPUTS = False augs = transforms.AugmentationList([ transforms.RandomBrightness(0.5, 1.5), transforms.RandomContrast(0.5, 1.5), transforms.RandomSaturation(0.5, 1.5), transforms.RandomFlip(prob=0.5), transforms.RandomExtent(scale_range=(0.1, 3), shift_range=(0.5, 0.5)), transforms.Resize(P.CNN_INPUT_SHAPE) ]) class Trainer(DefaultTrainer): @classmethod def build_train_loader(cls, cfg): mapper = DatasetMapper(cfg, is_train=True, augmentations=augs) return build_detection_train_loader(cfg, mapper=mapper) for d in ["train"]: #, "valid" with open(P.DATASET_DIR + d + "/labels.json", 'r') as fp: