def __init__(self, is_train: bool, to_pytorch: bool): if is_train: self._aug = Compose([ OneOf([ Compose([ SmallestMaxSize( max_size=min(data_height, data_width) * 1.1, p=1), RandomCrop(height=data_height, width=data_width, p=1) ], p=1), Resize(height=data_height, width=data_width, p=1) ], p=1), GaussNoise(p=0.5), RandomGamma(p=0.5), RandomBrightnessContrast(p=0.5), HorizontalFlip(p=0.5) ], p=1) else: self._aug = Compose([ SmallestMaxSize(max_size=min(data_height, data_width), p=1), CenterCrop(height=data_height, width=data_width, p=1) ], p=1) self._need_to_pytorch = to_pytorch
def _create_dataset(is_train: bool, augmented: bool, to_pytorch: bool = True, indices_path: str = None) -> BasicDataset: def vertical2quad(force_apply=False, **kwargs): image, mask = kwargs['image'], kwargs['mask'] max_size, min_size = np.max(image.shape), np.min( [image.shape[0], image.shape[1]]) image_tmp, mask_tmp = np.ones((max_size, max_size, image.shape[2]), dtype=np.uint8), np.zeros( (max_size, max_size), dtype=np.uint8) pos = (max_size - min_size) // 2 image_tmp[:, pos:pos + min_size, :] = image mask_tmp[:, pos:pos + min_size] = mask return {'image': image_tmp, 'mask': mask_tmp} regular_preprocess = OneOf([ Compose([ SmallestMaxSize(max_size=DATA_HEIGHT), RandomCrop(height=DATA_HEIGHT, width=DATA_WIDTH) ]), Compose([ SmallestMaxSize(max_size=int(DATA_HEIGHT * 1.2)), RandomCrop(height=DATA_HEIGHT, width=DATA_WIDTH) ]) ], p=1) vertical_img_preprocess = Compose([vertical2quad, regular_preprocess]) if augmented: datasets = [ # AugmentedDataset(ClothingCoParsingDataset()) # .add_aug(SegmentationAugmentations(is_train, to_pytorch, vertical_img_preprocess).augmentate), # AugmentedDataset(AISegmentDataset()).add_aug(SegmentationAugmentations(is_train, to_pytorch, regular_preprocess).augmentate), # AugmentedDataset(InstanceSegmentationDataset(SuperviselyPersonDataset())).add_aug(SegmentationAugmentations(is_train, to_pytorch, regular_preprocess).augmentate), # AugmentedDataset(PicsartDataset()).add_aug(SegmentationAugmentations(is_train, to_pytorch, regular_preprocess).augmentate), # AugmentedDataset(CHIP()).add_aug(SegmentationAugmentations(is_train, to_pytorch, regular_preprocess).augmentate), AugmentedDataset(MHPV2()).add_aug( SegmentationAugmentations(is_train, to_pytorch, regular_preprocess).augmentate), ] else: datasets = [ ClothingCoParsingDataset(), AISegmentDataset(), InstanceSegmentationDataset(SuperviselyPersonDataset()), PicsartDataset(), CHIP(), MHPV2(), ] dataset = DatasetsContainer(datasets) if indices_path is not None: dataset.load_indices(indices_path).remove_unused_data() return dataset
def __init__(self): # Variables to hold the description of the experiment self.description = "Training configuration file for the RGB version of the ResNet50 network." # System dependent variable self._workers = 10 self._multiprocessing = True # Variables for comet.ml self._project_name = "jpeg-deep" self._workspace = "classification_resnet50" # Network variables self._weights = None self._network = ResNet50() # Training variables self._epochs = 90 self._batch_size = 32 self._steps_per_epoch = 1281167 // self.batch_size self._validation_steps = 50000 // self._batch_size self.optimizer_parameters = {"lr": 0.0125, "momentum": 0.9} self._optimizer = SGD(**self.optimizer_parameters) self._loss = categorical_crossentropy self._metrics = ['accuracy', 'top_k_categorical_accuracy'] self.train_directory = join(environ["DATASET_PATH_TRAIN"], "train") self.validation_directory = join(environ["DATASET_PATH_VAL"], "validation") self.test_directory = join(environ["DATASET_PATH_TEST"], "validation") self.index_file = "data/imagenet_class_index.json" # Defining the transformations that will be applied to the inputs. self.train_transformations = [ SmallestMaxSize(256), RandomCrop(224, 224), HorizontalFlip() ] self.validation_transformations = [ SmallestMaxSize(256), CenterCrop(224, 224) ] self.test_transformations = [SmallestMaxSize(256)] # Keras stuff self._callbacks = [] self._train_generator = None self._validation_generator = None self._test_generator = None # Stuff for display self._displayer = ImageNetDisplayer(self.index_file)
def get_train_transforms(): return Compose( [ # RandomResizedCrop(CFG['img_size'], CFG['img_size']), SmallestMaxSize(max_size=512), # OneOf([RandomCrop(500, 500, p=0.4), # CenterCrop(500, 500, p=0.5), # RandomResizedCrop(512, 512, p=0.1)]), RandomCrop(320, 320), OneOf( [Transpose(p=0.5), HorizontalFlip(p=0.5), VerticalFlip(p=0.5)]), # ShiftScaleRotate(p=0.5), HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5), RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0), # Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], max_pixel_value=255.0, p=1.0), # CoarseDropout(p=0.5), ToTensorV2(p=1.0), ], p=1.)
def get_train_transform(smallest_max_size: int, size: int): return Compose([ SmallestMaxSize(smallest_max_size), RandomScale(scale_limit=0.125), # PadIfNeeded(256, 256, border_mode=cv2.BORDER_CONSTANT., value=0, p=1.), # ShiftScaleRotate( # shift_limit=0.0625, scale_limit=0.1, rotate_limit=30, # border_mode=cv2.BORDER_REFLECT_101, p=1.), Rotate(limit=20, border_mode=cv2.BORDER_REFLECT_101, p=1.), OneOf([ RandomCrop(size, size, p=0.9), CenterCrop(size, size, p=0.1), ], p=1.), HorizontalFlip(p=0.5), RandomContrast(limit=0.2, p=0.5), RandomGamma(gamma_limit=(80, 120), p=0.5), RandomBrightness(limit=0.2, p=0.5), # HueSaturationValue(hue_shift_limit=5, sat_shift_limit=20, # val_shift_limit=10, p=1.), # OneOf([ # OpticalDistortion(p=0.3), # GridDistortion(p=0.1), # IAAPiecewiseAffine(p=0.3), # ], p=0.2), # OneOf([ # IAAAdditiveGaussianNoise( # loc=0, scale=(1., 6.75), per_channel=False, p=0.3), # GaussNoise(var_limit=(5.0, 20.0), p=0.6), # ], p=0.5), # Cutout(num_holes=4, max_h_size=30, max_w_size=50, p=0.75), # JpegCompression(quality_lower=50, quality_upper=100, p=0.5) ])
def __init__(self, weights_path): model = EfficientNet.from_name('efficientnet-b7', override_params={'num_classes': 1}) for module in model.modules(): if isinstance(module, MBConvBlock): if module._block_args.expand_ratio != 1: expand_conv = module._expand_conv seq_expand_conv = SeqExpandConv( expand_conv.in_channels, expand_conv.out_channels, VIDEO_SEQUENCE_MODEL_SEQUENCE_LENGTH) module._expand_conv = seq_expand_conv self.model = model.cuda().eval() normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) self.transform = Compose([ SmallestMaxSize(VIDEO_MODEL_MIN_SIZE), CenterCrop(VIDEO_MODEL_CROP_HEIGHT, VIDEO_MODEL_CROP_WIDTH), normalize, ToTensor() ]) state = torch.load(weights_path, map_location=lambda storage, loc: storage) state = {key: value.float() for key, value in state.items()} self.model.load_state_dict(state)
def resize_transforms(image_size=224): pre_size = int(image_size * 1.5) random_crop = Compose([ SmallestMaxSize(pre_size, p=1), RandomCrop( image_size, image_size, p=1 ) ]) resize = Compose([ Resize(image_size, image_size, p=1) ]) random_crop_big = Compose([ LongestMaxSize(pre_size, p=1), RandomCrop( image_size, image_size, p=1 ) ]) # Converts the image to a square of size image_size x image_size result = [ OneOf([ random_crop, resize, random_crop_big ], p=1) ] return result
def __init__(self, first_weights_path, second_weights_path): first_model = EfficientNet.from_name( 'efficientnet-b7', override_params={'num_classes': 1}) self.first_model = first_model.cuda().eval() second_model = EfficientNet.from_name( 'efficientnet-b7', override_params={'num_classes': 1}) self.second_model = second_model.cuda().eval() first_normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) self.first_transform = Compose([ SmallestMaxSize(VIDEO_MODEL_CROP_WIDTH), PadIfNeeded(VIDEO_MODEL_CROP_HEIGHT, VIDEO_MODEL_CROP_WIDTH), CenterCrop(VIDEO_MODEL_CROP_HEIGHT, VIDEO_MODEL_CROP_WIDTH), first_normalize, ToTensor() ]) second_normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) self.second_transform = Compose([ SmallestMaxSize(VIDEO_MODEL_MIN_SIZE), CenterCrop(VIDEO_MODEL_CROP_HEIGHT, VIDEO_MODEL_CROP_WIDTH), second_normalize, ToTensor() ]) first_state = torch.load(first_weights_path, map_location=lambda storage, loc: storage) first_state = { key: value.float() for key, value in first_state.items() } self.first_model.load_state_dict(first_state) second_state = torch.load(second_weights_path, map_location=lambda storage, loc: storage) second_state = { key: value.float() for key, value in second_state.items() } self.second_model.load_state_dict(second_state)
def get_test_transform(smallest_max_size: int, size: int): return Compose([ # RandomScale(scale_limit=0.125), SmallestMaxSize(smallest_max_size), # PadIfNeeded(256, 256, border_mode=cv2.BORDER_REFLECT_101, value=0, p=1.), # OneOf([ # RandomCrop(224, 224, p=0.9), # CenterCrop(224, 224, p=0.1), # ], p=1.), CenterCrop(size, size, p=1.) # HorizontalFlip(p=0.5), ])
def get_inference_transforms(): return Compose( [ SmallestMaxSize(max_size=512), Resize(320, 320), # RandomCrop(320, 320), # Transpose(p=0.5), # HorizontalFlip(p=0.5), # VerticalFlip(p=0.5), # HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5), # RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0), ToTensorV2(p=1.0), ], p=1.0)
def test(cfg): base_size = 576 normalization = ImageNormalization(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) interpolation = cv2.INTER_LINEAR test_dataset = CaptchaDataset( input_dir=cfg.val_dataset_path, normalization=normalization, input_file='test.txt', aug=[ SmallestMaxSize(max_size=base_size, always_apply=True, interpolation=interpolation) ], default_transform=False, original_masks=True, ) num_classes = test_dataset.num_classes() num_labels = test_dataset.num_labels() net = init_model(num_classes, num_labels) batch_size = 1 if len(cfg.gpu_ids) > 0: batch_size = batch_size * len(cfg.gpu_ids) tester = CaptchaTester(cfg, net=net, test_dataset=test_dataset, image_normalization=normalization, batch_size=batch_size, n_display=64) if cfg.test_cmd == 'metrics': results = tester.evaluate(use_flips=True) output_str = ', '.join([f'{k}: {results[k]:.4f}' for k in results]) logger.info(output_str) elif cfg.test_cmd == 'visualize': raise NotImplementedError else: assert False, f'unknown test command {cfg.test_cmd}'
def __init__(self): # Variables to hold the description of the experiment self.description = "Training configuration file for the VGG deconvolution network." # System dependent variable self._workers = 5 self._multiprocessing = True # Variables for comet.ml self._project_name = "jpeg_deep" self._workspace = "classification_dct_deconv" # Network variables self._weights = None self._network = VGG16_dct_deconv() # Training variables self._epochs = 180 self._batch_size = 64 self._steps_per_epoch = 1281167 // self._batch_size self._validation_steps = 50000 // self._batch_size self.optimizer_parameters = { "lr": 0.0025, "momentum": 0.9} self._optimizer = SGD(**self.optimizer_parameters) self._loss = categorical_crossentropy self._metrics = ['accuracy', 'top_k_categorical_accuracy'] self.train_directory = join( environ["DATASET_PATH_TRAIN"], "train") self.validation_directory = join( environ["DATASET_PATH_VAL"], "validation") self.test_directory = join( environ["DATASET_PATH_TEST"], "validation") self.index_file = "data/imagenet_class_index.json" # Defining the transformations that will be applied to the inputs. self.train_transformations = [ SmallestMaxSize(256), RandomCrop(224, 224), HorizontalFlip() ] self.validation_transformations = [ SmallestMaxSize(256), CenterCrop(224, 224)] self.test_transformations = [SmallestMaxSize(256)] # Keras stuff self.reduce_lr_on_plateau = ReduceLROnPlateau(patience=5, verbose=1) self.terminate_on_nan = TerminateOnNaN() self.early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=11) self._callbacks = [self.reduce_lr_on_plateau, self.terminate_on_nan, self.early_stopping] # Creating the training and validation generator self._train_generator = None self._validation_generator = None self._test_generator = None self._displayer = ImageNetDisplayer(self.index_file)
def get_val_augs(): return Compose([ SmallestMaxSize(resize_size), CenterCrop(*img_size), ])
def get_train_augs(): return Compose([ SmallestMaxSize(resize_size), RandomCrop(*img_size), HorizontalFlip() ])
def main(): # --------- 1. get image path and name --------- model_name = 'u2netp' # u2netp u2net data_dir = '/data2/wangjiajie/datasets/scene_segment1023/u2data/' image_dir = os.path.join(data_dir, 'test_imgs') prediction_dir = os.path.join('./outputs/', model_name + '/') if not os.path.exists(prediction_dir): os.makedirs(prediction_dir, exist_ok=True) # tra_label_dir = 'test_lbls/' image_ext = '.jpg' # label_ext = '.jpg' # '.png' model_dir = os.path.join(os.getcwd(), 'saved_models', model_name, model_name + '.pth') img_name_list = glob.glob(image_dir + os.sep + '*') print(f'test img numbers are: {len(img_name_list)}') # --------- 2. dataloader --------- #1. dataloader test_salobj_dataset = SalObjDataset(img_name_list=img_name_list, lbl_name_list=[], transform=Compose([ SmallestMaxSize(max_size=320), ])) test_salobj_dataloader = DataLoader(test_salobj_dataset, batch_size=1, shuffle=False, num_workers=1) # --------- 3. model define --------- if (model_name == 'u2net'): print("...load U2NET---173.6 MB") net = U2NET(3, 1) elif (model_name == 'u2netp'): print("...load U2NEP---4.7 MB") net = U2NETP(3, 1) # net.load_state_dict(torch.load(model_dir)) checkpoint = torch.load(model_dir) d = collections.OrderedDict() for key, value in checkpoint.items(): tmp = key[7:] d[tmp] = value net.load_state_dict(d) if torch.cuda.is_available(): net.cuda() net.eval() # --------- 4. inference for each image --------- for i_test, data_test in enumerate(test_salobj_dataloader): print("inferencing:", img_name_list[i_test].split(os.sep)[-1]) inputs_test = data_test['image'] inputs_test = inputs_test.type(torch.FloatTensor) if torch.cuda.is_available(): inputs_test = Variable(inputs_test.cuda()) else: inputs_test = Variable(inputs_test) d1, d2, d3, d4, d5, d6, d7 = net(inputs_test) # normalization pred = 1.0 - d1[:, 0, :, :] pred = normPRED(pred) # save results to test_results folder save_output(img_name_list[i_test], pred, prediction_dir) del d1, d2, d3, d4, d5, d6, d7
def transform(image, mask, image_name, mask_name): x, y = image, mask rand = random.uniform(0, 1) if (rand > 0.5): images_name = [f"{image_name}"] masks_name = [f"{mask_name}"] images_aug = [x] masks_aug = [y] it = iter(images_name) it2 = iter(images_aug) imagedict = dict(zip(it, it2)) it = iter(masks_name) it2 = iter(masks_aug) masksdict = dict(zip(it, it2)) return imagedict, masksdict mask_density = np.count_nonzero(y) ## Augmenting only images with Gloms if (mask_density > 0): try: h, w, c = x.shape except Exception as e: image = image[:-1] x, y = image, mask h, w, c = x.shape aug = Blur(p=1, blur_limit=3) augmented = aug(image=x, mask=y) x0 = augmented['image'] y0 = augmented['mask'] # aug = CenterCrop(p=1, height=32, width=32) # augmented = aug(image=x, mask=y) # x1 = augmented['image'] # y1 = augmented['mask'] ## Horizontal Flip aug = HorizontalFlip(p=1) augmented = aug(image=x, mask=y) x2 = augmented['image'] y2 = augmented['mask'] aug = VerticalFlip(p=1) augmented = aug(image=x, mask=y) x3 = augmented['image'] y3 = augmented['mask'] # aug = Normalize(p=1) # augmented = aug(image=x, mask=y) # x4 = augmented['image'] # y4 = augmented['mask'] aug = Transpose(p=1) augmented = aug(image=x, mask=y) x5 = augmented['image'] y5 = augmented['mask'] aug = RandomGamma(p=1) augmented = aug(image=x, mask=y) x6 = augmented['image'] y6 = augmented['mask'] ## Optical Distortion aug = OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5) augmented = aug(image=x, mask=y) x7 = augmented['image'] y7 = augmented['mask'] ## Grid Distortion aug = GridDistortion(p=1) augmented = aug(image=x, mask=y) x8 = augmented['image'] y8 = augmented['mask'] aug = RandomGridShuffle(p=1) augmented = aug(image=x, mask=y) x9 = augmented['image'] y9 = augmented['mask'] aug = HueSaturationValue(p=1) augmented = aug(image=x, mask=y) x10 = augmented['image'] y10 = augmented['mask'] # aug = PadIfNeeded(p=1) # augmented = aug(image=x, mask=y) # x11 = augmented['image'] # y11 = augmented['mask'] aug = RGBShift(p=1) augmented = aug(image=x, mask=y) x12 = augmented['image'] y12 = augmented['mask'] ## Random Brightness aug = RandomBrightness(p=1) augmented = aug(image=x, mask=y) x13 = augmented['image'] y13 = augmented['mask'] ## Random Contrast aug = RandomContrast(p=1) augmented = aug(image=x, mask=y) x14 = augmented['image'] y14 = augmented['mask'] #aug = MotionBlur(p=1) #augmented = aug(image=x, mask=y) # x15 = augmented['image'] # y15 = augmented['mask'] aug = MedianBlur(p=1, blur_limit=5) augmented = aug(image=x, mask=y) x16 = augmented['image'] y16 = augmented['mask'] aug = GaussianBlur(p=1, blur_limit=3) augmented = aug(image=x, mask=y) x17 = augmented['image'] y17 = augmented['mask'] aug = GaussNoise(p=1) augmented = aug(image=x, mask=y) x18 = augmented['image'] y18 = augmented['mask'] aug = GlassBlur(p=1) augmented = aug(image=x, mask=y) x19 = augmented['image'] y19 = augmented['mask'] aug = CLAHE(clip_limit=1.0, tile_grid_size=(8, 8), always_apply=False, p=1) augmented = aug(image=x, mask=y) x20 = augmented['image'] y20 = augmented['mask'] aug = ChannelShuffle(p=1) augmented = aug(image=x, mask=y) x21 = augmented['image'] y21 = augmented['mask'] aug = ToGray(p=1) augmented = aug(image=x, mask=y) x22 = augmented['image'] y22 = augmented['mask'] aug = ToSepia(p=1) augmented = aug(image=x, mask=y) x23 = augmented['image'] y23 = augmented['mask'] aug = JpegCompression(p=1) augmented = aug(image=x, mask=y) x24 = augmented['image'] y24 = augmented['mask'] aug = ImageCompression(p=1) augmented = aug(image=x, mask=y) x25 = augmented['image'] y25 = augmented['mask'] aug = Cutout(p=1) augmented = aug(image=x, mask=y) x26 = augmented['image'] y26 = augmented['mask'] # aug = CoarseDropout(p=1, max_holes=8, max_height=32, max_width=32) # augmented = aug(image=x, mask=y) # x27 = augmented['image'] # y27 = augmented['mask'] # aug = ToFloat(p=1) # augmented = aug(image=x, mask=y) # x28 = augmented['image'] # y28 = augmented['mask'] aug = FromFloat(p=1) augmented = aug(image=x, mask=y) x29 = augmented['image'] y29 = augmented['mask'] ## Random Brightness and Contrast aug = RandomBrightnessContrast(p=1) augmented = aug(image=x, mask=y) x30 = augmented['image'] y30 = augmented['mask'] aug = RandomSnow(p=1) augmented = aug(image=x, mask=y) x31 = augmented['image'] y31 = augmented['mask'] aug = RandomRain(p=1) augmented = aug(image=x, mask=y) x32 = augmented['image'] y32 = augmented['mask'] aug = RandomFog(p=1) augmented = aug(image=x, mask=y) x33 = augmented['image'] y33 = augmented['mask'] aug = RandomSunFlare(p=1) augmented = aug(image=x, mask=y) x34 = augmented['image'] y34 = augmented['mask'] aug = RandomShadow(p=1) augmented = aug(image=x, mask=y) x35 = augmented['image'] y35 = augmented['mask'] aug = Lambda(p=1) augmented = aug(image=x, mask=y) x36 = augmented['image'] y36 = augmented['mask'] aug = ChannelDropout(p=1) augmented = aug(image=x, mask=y) x37 = augmented['image'] y37 = augmented['mask'] aug = ISONoise(p=1) augmented = aug(image=x, mask=y) x38 = augmented['image'] y38 = augmented['mask'] aug = Solarize(p=1) augmented = aug(image=x, mask=y) x39 = augmented['image'] y39 = augmented['mask'] aug = Equalize(p=1) augmented = aug(image=x, mask=y) x40 = augmented['image'] y40 = augmented['mask'] aug = Posterize(p=1) augmented = aug(image=x, mask=y) x41 = augmented['image'] y41 = augmented['mask'] aug = Downscale(p=1) augmented = aug(image=x, mask=y) x42 = augmented['image'] y42 = augmented['mask'] aug = MultiplicativeNoise(p=1) augmented = aug(image=x, mask=y) x43 = augmented['image'] y43 = augmented['mask'] aug = FancyPCA(p=1) augmented = aug(image=x, mask=y) x44 = augmented['image'] y44 = augmented['mask'] # aug = MaskDropout(p=1) # augmented = aug(image=x, mask=y) # x45 = augmented['image'] # y45 = augmented['mask'] aug = GridDropout(p=1) augmented = aug(image=x, mask=y) x46 = augmented['image'] y46 = augmented['mask'] aug = ColorJitter(p=1) augmented = aug(image=x, mask=y) x47 = augmented['image'] y47 = augmented['mask'] ## ElasticTransform aug = ElasticTransform(p=1, alpha=120, sigma=512 * 0.05, alpha_affine=512 * 0.03) augmented = aug(image=x, mask=y) x50 = augmented['image'] y50 = augmented['mask'] aug = CropNonEmptyMaskIfExists(p=1, height=22, width=32) augmented = aug(image=x, mask=y) x51 = augmented['image'] y51 = augmented['mask'] aug = IAAAffine(p=1) augmented = aug(image=x, mask=y) x52 = augmented['image'] y52 = augmented['mask'] # aug = IAACropAndPad(p=1) # augmented = aug(image=x, mask=y) # x53 = augmented['image'] # y53 = augmented['mask'] aug = IAAFliplr(p=1) augmented = aug(image=x, mask=y) x54 = augmented['image'] y54 = augmented['mask'] aug = IAAFlipud(p=1) augmented = aug(image=x, mask=y) x55 = augmented['image'] y55 = augmented['mask'] aug = IAAPerspective(p=1) augmented = aug(image=x, mask=y) x56 = augmented['image'] y56 = augmented['mask'] aug = IAAPiecewiseAffine(p=1) augmented = aug(image=x, mask=y) x57 = augmented['image'] y57 = augmented['mask'] aug = LongestMaxSize(p=1) augmented = aug(image=x, mask=y) x58 = augmented['image'] y58 = augmented['mask'] aug = NoOp(p=1) augmented = aug(image=x, mask=y) x59 = augmented['image'] y59 = augmented['mask'] # aug = RandomCrop(p=1, height=22, width=22) # augmented = aug(image=x, mask=y) # x61 = augmented['image'] # y61 = augmented['mask'] # aug = RandomResizedCrop(p=1, height=22, width=20) # augmented = aug(image=x, mask=y) # x63 = augmented['image'] # y63 = augmented['mask'] aug = RandomScale(p=1) augmented = aug(image=x, mask=y) x64 = augmented['image'] y64 = augmented['mask'] # aug = RandomSizedCrop(p=1, height=22, width=20, min_max_height = [32,32]) # augmented = aug(image=x, mask=y) # x66 = augmented['image'] # y66 = augmented['mask'] # aug = Resize(p=1, height=22, width=20) # augmented = aug(image=x, mask=y) # x67 = augmented['image'] # y67 = augmented['mask'] aug = Rotate(p=1) augmented = aug(image=x, mask=y) x68 = augmented['image'] y68 = augmented['mask'] aug = ShiftScaleRotate(p=1) augmented = aug(image=x, mask=y) x69 = augmented['image'] y69 = augmented['mask'] aug = SmallestMaxSize(p=1) augmented = aug(image=x, mask=y) x70 = augmented['image'] y70 = augmented['mask'] images_aug.extend([ x, x0, x2, x3, x5, x6, x7, x8, x9, x10, x12, x13, x14, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x29, x30, x31, x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x46, x47, x50, x51, x52, x54, x55, x56, x57, x58, x59, x64, x68, x69, x70 ]) masks_aug.extend([ y, y0, y2, y3, y5, y6, y7, y8, y9, y10, y12, y13, y14, y16, y17, y18, y19, y20, y21, y22, y23, y24, y25, y26, y29, y30, y31, y32, y33, y34, y35, y36, y37, y38, y39, y40, y41, y42, y43, y44, y46, y47, y50, y51, y52, y54, y55, y56, y57, y58, y59, y64, y68, y69, y70 ]) idx = -1 images_name = [] masks_name = [] for i, m in zip(images_aug, masks_aug): if idx == -1: tmp_image_name = f"{image_name}" tmp_mask_name = f"{mask_name}" else: tmp_image_name = f"{image_name}_{smalllist[idx]}" tmp_mask_name = f"{mask_name}_{smalllist[idx]}" images_name.extend(tmp_image_name) masks_name.extend(tmp_mask_name) idx += 1 it = iter(images_name) it2 = iter(images_aug) imagedict = dict(zip(it, it2)) it = iter(masks_name) it2 = iter(masks_aug) masksdict = dict(zip(it, it2)) return imagedict, masksdict
print('Bad arguments passed', file=sys.stderr) parser.print_help(file=sys.stderr) exit(2) args = parser.parse_args() if (args.image is None and args.web_cam is None) or (args.image is not None and args.web_cam is not None): print("Please define one of option: -i or -w") parser.print_help(file=sys.stderr) sys.exit(1) vis = ColormapVisualizer([0.5, 0.5]) seg = Segmentation(accuracy_lvl=Segmentation.Level.LEVEL_2) seg.set_device(args.device) data_transform = Compose([SmallestMaxSize(max_size=512, always_apply=True), CenterCrop(height=512, width=512, always_apply=True)], p=1) if args.image is not None: image = cv2.cvtColor(cv2.imread(args.image), cv2.COLOR_BGR2RGB) image = data_transform(image) cv2.imwrite('result.jpg', seg.process(image)[0]) elif args.web_cam is not None: title = "Person segmentation example" cv2.namedWindow(title, cv2.WINDOW_GUI_NORMAL) cap = cv2.VideoCapture(0) while cv2.waitKey(1) & 0xFF != ord('q'): ret, frame = cap.read()
def albumentations_transforms( crop_size, shorter_side, low_scale, high_scale, img_mean, img_std, img_scale, ignore_label, num_stages, dataset_type, ): from albumentations import ( Normalize, HorizontalFlip, RandomRotate90, # my addition RandomBrightnessContrast, # my addition CLAHE, # my addition RandomGamma, # my addition ElasticTransform, # my addition GridDistortion, # my addition MotionBlur, # my addition RandomCrop, PadIfNeeded, RandomScale, LongestMaxSize, SmallestMaxSize, OneOf, ) from albumentations.pytorch import ToTensorV2 as ToTensor from densetorch.data import albumentations2densetorch if dataset_type == "densetorch": wrapper = albumentations2densetorch elif dataset_type == "torchvision": wrapper = albumentations2torchvision else: raise ValueError(f"Unknown dataset type: {dataset_type}") common_transformations = [ Normalize(max_pixel_value=1.0 / img_scale, mean=img_mean, std=img_std), ToTensor(), ] train_transforms = [] for stage in range(num_stages): train_transforms.append( wrapper([ ChangeBackground("../backgrounds", p=0.5), # my addition MotionBlur(p=0.5), OneOf([ RandomScale(scale_limit=(low_scale[stage], high_scale[stage])), LongestMaxSize(max_size=shorter_side[stage]), SmallestMaxSize(max_size=shorter_side[stage]), ]), PadIfNeeded( min_height=crop_size[stage], min_width=crop_size[stage], border_mode=cv2.BORDER_CONSTANT, value=np.array(img_mean) / img_scale, mask_value=ignore_label, ), HorizontalFlip(p=0.5, ), RandomRotate90(p=0.5), RandomBrightnessContrast( p=.8), # only applies to images, not masks RandomGamma(p=0.8), # only applies to images OneOf( [ ElasticTransform(p=0.5, alpha=120, sigma=500 * 0.05, alpha_affine=500 * 0.03), GridDistortion(p=0.5), # A.OpticalDistortion(distort_limit=1, shift_limit=0.5, p=1), ], p=.5), RandomCrop( height=crop_size[stage], width=crop_size[stage], ), ] + common_transformations)) val_transforms = wrapper(common_transformations) return train_transforms, val_transforms
def albumentations_transforms( crop_size, shorter_side, low_scale, high_scale, img_mean, img_std, img_scale, ignore_label, num_stages, dataset_type, ): from albumentations import ( Normalize, HorizontalFlip, RandomCrop, PadIfNeeded, RandomScale, LongestMaxSize, SmallestMaxSize, OneOf, ) from albumentations.pytorch import ToTensorV2 as ToTensor from densetorch.data import albumentations2densetorch if dataset_type == "densetorch": wrapper = albumentations2densetorch elif dataset_type == "torchvision": wrapper = albumentations2torchvision else: raise ValueError(f"Unknown dataset type: {dataset_type}") common_transformations = [ Normalize(max_pixel_value=1.0 / img_scale, mean=img_mean, std=img_std), ToTensor(), ] train_transforms = [] for stage in range(num_stages): train_transforms.append( wrapper( [ OneOf( [ RandomScale( scale_limit=(low_scale[stage], high_scale[stage]) ), LongestMaxSize(max_size=shorter_side[stage]), SmallestMaxSize(max_size=shorter_side[stage]), ] ), PadIfNeeded( min_height=crop_size[stage], min_width=crop_size[stage], border_mode=cv2.BORDER_CONSTANT, value=np.array(img_mean) / img_scale, mask_value=ignore_label, ), HorizontalFlip(p=0.5,), RandomCrop(height=crop_size[stage], width=crop_size[stage],), ] + common_transformations ) ) val_transforms = wrapper(common_transformations) return train_transforms, val_transforms
def main(): with open('config.yaml', 'r') as f: config = yaml.load(f) set_global_seed(SEED) prepare_cudnn(deterministic=True, benchmark=True) model = EfficientNet.from_name('efficientnet-b7', override_params={'num_classes': 1}) state = torch.load(PRETRAINED_WEIGHTS_PATH, map_location=lambda storage, loc: storage) state.pop('_fc.weight') state.pop('_fc.bias') res = model.load_state_dict(state, strict=False) assert set(res.missing_keys) == set(['_fc.weight', '_fc.bias']), 'issue loading pretrained weights' model = model.cuda() normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) _, rand_augment, _ = transforms_imagenet_train((CROP_HEIGHT, CROP_WIDTH), auto_augment='original-mstd0.5', separate=True) train_dataset = TrackPairDataset(os.path.join(config['ARTIFACTS_PATH'], TRACKS_ROOT), os.path.join(config['ARTIFACTS_PATH'], TRACK_PAIRS_FILE_NAME), TRAIN_INDICES, track_length=TRACK_LENGTH, track_transform=TrackTransform(FPS_RANGE, SCALE_RANGE, CRF_RANGE, TUNE_VALUES), image_transform=Compose([ SmallestMaxSize(CROP_WIDTH), PadIfNeeded(CROP_HEIGHT, CROP_WIDTH), HorizontalFlip(), RandomCrop(CROP_HEIGHT, CROP_WIDTH), VisionTransform(rand_augment, is_tensor=False, p=0.5), normalize, ToTensor() ]), sequence_mode=False) print('Train dataset size: {}.'.format(len(train_dataset))) warmup_optimizer = torch.optim.SGD(model._fc.parameters(), INITIAL_LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY, nesterov=True) full_optimizer = torch.optim.SGD(model.parameters(), INITIAL_LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY, nesterov=True) full_lr_scheduler = torch.optim.lr_scheduler.LambdaLR(full_optimizer, lambda iteration: (MAX_ITERS - iteration) / MAX_ITERS) snapshots_root = os.path.join(config['ARTIFACTS_PATH'], SNAPSHOTS_ROOT, OUTPUT_FOLDER_NAME) os.makedirs(snapshots_root) log_root = os.path.join(config['ARTIFACTS_PATH'], LOGS_ROOT, OUTPUT_FOLDER_NAME) os.makedirs(log_root) writer = SummaryWriter(log_root) iteration = 0 if iteration < NUM_WARMUP_ITERATIONS: print('Start {} warmup iterations'.format(NUM_WARMUP_ITERATIONS)) model.eval() model._fc.train() for param in model.parameters(): param.requires_grad = False for param in model._fc.parameters(): param.requires_grad = True optimizer = warmup_optimizer else: print('Start without warmup iterations') model.train() optimizer = full_optimizer max_lr = max(param_group["lr"] for param_group in full_optimizer.param_groups) writer.add_scalar('train/max_lr', max_lr, iteration) epoch = 0 fake_prob_dist = distributions.beta.Beta(0.5, 0.5) while True: epoch += 1 print('Epoch {} is in progress'.format(epoch)) loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True) for samples in tqdm.tqdm(loader): iteration += 1 fake_input_tensor = torch.cat(samples['fake']).cuda() real_input_tensor = torch.cat(samples['real']).cuda() target_fake_prob = fake_prob_dist.sample((len(fake_input_tensor),)).float().cuda() fake_weight = target_fake_prob.view(-1, 1, 1, 1) input_tensor = (1.0 - fake_weight) * real_input_tensor + fake_weight * fake_input_tensor pred = model(input_tensor).flatten() loss = F.binary_cross_entropy_with_logits(pred, target_fake_prob) optimizer.zero_grad() loss.backward() optimizer.step() if iteration > NUM_WARMUP_ITERATIONS: full_lr_scheduler.step() max_lr = max(param_group["lr"] for param_group in full_optimizer.param_groups) writer.add_scalar('train/max_lr', max_lr, iteration) writer.add_scalar('train/loss', loss.item(), iteration) if iteration == NUM_WARMUP_ITERATIONS: print('Stop warmup iterations') model.train() for param in model.parameters(): param.requires_grad = True optimizer = full_optimizer if iteration % SNAPSHOT_FREQUENCY == 0: snapshot_name = SNAPSHOT_NAME_TEMPLATE.format(iteration) snapshot_path = os.path.join(snapshots_root, snapshot_name) print('Saving snapshot to {}'.format(snapshot_path)) torch.save(model.state_dict(), snapshot_path) if iteration >= MAX_ITERS: print('Stop training due to maximum iteration exceeded') return
def collate_fn(x): return list(zip(*x)) if __name__ == '__main__': ap = argparse.ArgumentParser() ap.add_argument('-i', '--path_in', type=str, required=True) ap.add_argument('-o', '--path_out', type=str, required=True) ap.add_argument('-b', '--batch_size', type=int, default=128) ap.add_argument('-w', '--num_workers', type=int, default=4) args = vars(ap.parse_args()) size = max(get_minimal_size(args['path_in']), 160) transform = Compose([ SmallestMaxSize(size), CenterCrop(size, size), ]) dataset = ImageFolderWithPath(transform, args['path_in']) p = Path(args['path_out']) for cls in dataset.class_to_idx.keys(): Path(p, cls).mkdir(parents=True, exist_ok=True) dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=args['batch_size'], num_workers=args['num_workers']) for imgs, paths in dataloader: paths = [Path(p, path) for path in paths] for i, img, in enumerate(imgs):
## Rotate elif augmentation == 'rotate': transform = Rotate(always_apply=True) transformed_image = transform(image=image)['image'] elif augmentation == 'random_rotate90': transform = RandomRotate90(always_apply=True) transformed_image = transform(image=image)['image'] elif augmentation == 'transpose': transform = Transpose(always_apply=True) transformed_image = transform(image=image)['image'] ## Size elif augmentation == 'resize': transform = Resize(always_apply=True, height=100, width=100) transformed_image = transform(image=image)['image'] elif augmentation == 'longest_max_size': transform = LongestMaxSize(always_apply=True) transformed_image = transform(image=image)['image'] elif augmentation == 'smallest_max_size': transform = SmallestMaxSize(always_apply=True) transformed_image = transform(image=image)['image'] name, ext = image_name.split('.') new_path = name + '_' + augmentation + '.' + ext cv2.imwrite(new_path, transformed_image)
def generate_transforms(): train_transform = Compose( [SmallestMaxSize(max_size=320), RandomCrop(height=288, width=288)]) return train_transform
import cv2 from albumentations import ( Compose, HorizontalFlip, Rotate, HueSaturationValue, RandomBrightness, RandomContrast, RandomGamma, JpegCompression, GaussNoise, Cutout, MedianBlur, Blur, OneOf, IAAAdditiveGaussianNoise, OpticalDistortion, GridDistortion, IAAPiecewiseAffine, ShiftScaleRotate, CenterCrop, RandomCrop, CenterCrop, Resize, PadIfNeeded, RandomScale, SmallestMaxSize) from albumentations.pytorch.transforms import ToTensor cv2.setNumThreads(0) train_transform = Compose([ SmallestMaxSize(224), RandomScale(scale_limit=0.125), # PadIfNeeded(256, 256, border_mode=cv2.BORDER_CONSTANT., value=0, p=1.), # ShiftScaleRotate( # shift_limit=0.0625, scale_limit=0.1, rotate_limit=30, # border_mode=cv2.BORDER_REFLECT_101, p=1.), Rotate(limit=20, border_mode=cv2.BORDER_REFLECT_101, p=1.), OneOf([ RandomCrop(192, 192, p=0.9), CenterCrop(192, 192, p=0.1), ], p=1.), HorizontalFlip(p=0.5), RandomContrast(limit=0.2, p=0.5), RandomGamma(gamma_limit=(80, 120), p=0.5), RandomBrightness(limit=0.2, p=0.5), # HueSaturationValue(hue_shift_limit=5, sat_shift_limit=20, # val_shift_limit=10, p=1.), # OneOf([ # OpticalDistortion(p=0.3),
from albumentations import (HorizontalFlip, ShiftScaleRotate, RGBShift, CenterCrop, RandomSizedCrop, SmallestMaxSize, RandomCrop, ShiftScaleRotate, HueSaturationValue, Normalize, RandomContrast, RandomBrightness, Flip, OneOf, Compose) train_transform = Compose([ SmallestMaxSize(max_size=256), ShiftScaleRotate(scale_limit=(0.5, 1), rotate_limit=5), RandomCrop(224, 224, p=1.0), HorizontalFlip(0.5), OneOf([ HueSaturationValue( hue_shift_limit=5, sat_shift_limit=5, val_shift_limit=5, p=1), RGBShift(r_shift_limit=10, g_shift_limit=10, b_shift_limit=10, p=0.5) ], p=0.2), OneOf([ RandomBrightness(limit=0.2, p=1.), RandomContrast(limit=0.2, p=1.), ], p=0.2), Normalize(p=1.), ], p=1.0) valid_transform = Compose([ SmallestMaxSize(max_size=256), CenterCrop(224, 224, p=1.0), Normalize(p=1.), ],