def train(train_df, valid_df, model, model_name,input_size, epochs, batch_size, save_weights_path, save_logs_path, train_dir,valid_dir, ): if not os.path.exists(save_weights_path): os.makedirs(save_weights_path) if not os.path.exists(save_logs_path): os.makedirs(save_logs_path) AUGMENTATIONS_TRAIN = Compose([ HorizontalFlip(p=0.25), RandomSizedCrop(min_max_height =(int(input_size*0.75),input_size),height=input_size,width=input_size,p=0.25), OneOf([ ShiftScaleRotate(rotate_limit=25), ElasticTransform(alpha=120,sigma=120*0.05,alpha_affine=120*0.03), GridDistortion(), OpticalDistortion(distort_limit=2,shift_limit=0.5), ],p=0.5), OneOf([ RandomContrast(), RandomGamma(), RandomBrightness() ],p=0.5), OneOf([ Blur(), MedianBlur(), GaussNoise(), GaussianBlur() ],p=0.5) ],p=0.5) #Generator Parmas params_train = { 'list_IDs':list(train_df.Image), 'labels':list(train_df.Labels), 'dim':(input_size,input_size), 'data_dir':train_dir, 'batch_size':batch_size, 'n_channels':3, 'n_classees':PARAMS_JSON['CLASS_NUM'], 'aug':AUGMENTATIONS_TRAIN, 'model_name':model_name, 'preprocess_input':preprocess_input, 'to_categori':to_categori, 'shuffle':True} params_val = { 'list_IDs':list(valid_df.Image), 'labels':list(valid_df.Labels), 'dim':(input_size,input_size), 'data_dir':valid_dir, 'batch_size':batch_size, 'n_channels':3, 'n_classees':PARAMS_JSON['CLASS_NUM'], 'aug':None, 'model_name':model_name, 'preprocess_input':preprocess_input, 'to_categori':to_categori, 'shuffle':True} #Create Generator train_generator = DataGenerator(**params_train) validation_generator = DataGenerator(**params_val) #get class weight class_weight_dict = CalculateClassWeight(train_df,PARAMS_JSON['CLASS_NUM'],to_categori) #model check point model_path = os.path.join(save_weights_path,'{epoch:02d}.h5') check_point = ModelCheckpoint(filepath=model_path, monitor='val_auc',verbose=1,mode=max) #lr schedule reduceLR = ReduceLROnPlateau(monitor='val_loss',factor=0.5,mode = min,patience=5) #callbacks of CsvLogger logs_path = os.path.join(save_logs_path,"log.csv") csvlogger = CSVLogger(logs_path) callbacks = [csvlogger,check_point,reduceLR] #Train model.fit_generator(generator = train_generator, epochs=epochs, validation_data=validation_generator, callbacks=callbacks, class_weight=class_weight_dict)
from albumentations import Compose, Resize, RandomCrop, Flip, HorizontalFlip, VerticalFlip, Transpose, RandomRotate90, \ ShiftScaleRotate, OneOf, Blur, MotionBlur, MedianBlur, GaussianBlur from albumentations.pytorch import ToTensor train_aug = Compose([ RandomCrop(height=96, width=96, p=0.2), OneOf([ VerticalFlip(p=0.2), HorizontalFlip(p=0.3), Transpose(p=0.2), RandomRotate90(p=0.2), ], p=0.3), ShiftScaleRotate(p=0.2), OneOf([ Blur(p=0.2), MotionBlur(p=0.2), MedianBlur(p=0.2), GaussianBlur(p=0.2), ], p=0.3), Resize(128, 128, always_apply=True), ToTensor() ]) valid_aug = Compose([ Resize(128, 128, always_apply=True), ToTensor() ])
def aug_val(resolution, p=1): return Compose([Resize(resolution, resolution), Normalize()], p=p)
def compose_transforms(self): self.transformer = Compose(transforms=self.transforms)
from albumentations import Compose, RandomBrightnessContrast, \ HorizontalFlip, FancyPCA, HueSaturationValue, OneOf, ToGray, \ ShiftScaleRotate, ImageCompression, PadIfNeeded, GaussNoise, GaussianBlur import cv2 from albu import IsotropicResize size = 256 # Declare an augmentation pipeline transform = Compose([ ImageCompression(quality_lower=60, quality_upper=100, p=0.5), GaussNoise(p=0.1), GaussianBlur(blur_limit=3, p=0.05), HorizontalFlip(p=0.5), OneOf([ IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC), IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_LINEAR), IsotropicResize(max_side=size, interpolation_down=cv2.INTER_LINEAR, interpolation_up=cv2.INTER_LINEAR), ], p=0.7), PadIfNeeded(min_height=size, min_width=size, border_mode=cv2.BORDER_CONSTANT), OneOf([RandomBrightnessContrast(), FancyPCA(), HueSaturationValue()], p=0.7), ToGray(p=0.2), ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=10, border_mode=cv2.BORDER_CONSTANT, p=0.5), ]) # Read an image with OpenCV and convert it to the RGB colorspace image = cv2.imread("/home/ubuntu/dataset/dfdc_image/train/dfdc_train_part_0/aaqaifqrwn/frame0.jpg") #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Augment an image transformed = transform(image=image)
def __call__(self, data): data = to_numpy(data) img, label = data['image'], data['label'] is_3d = True if img.shape == 4 else False max_size = max(self._output_size[0], self._output_size[1]) if self._type == 'train': task = [ HorizontalFlip(p=0.5), RandomBrightnessContrast(p=0.5), RandomGamma(p=0.5), GridDistortion(border_mode=cv2.BORDER_CONSTANT, p=0.5), LongestMaxSize(max_size, p=1), PadIfNeeded(self._output_size[0], self._output_size[1], cv2.BORDER_CONSTANT, value=0, p=1), ShiftScaleRotate(shift_limit=0.2, scale_limit=0.5, rotate_limit=30, border_mode=cv2.BORDER_CONSTANT, value=0, p=0.5) ] else: task = [ LongestMaxSize(max_size, p=1), PadIfNeeded(self._output_size[0], self._output_size[1], cv2.BORDER_CONSTANT, value=0, p=1) ] if self.use_roi: assert 'roi' in data.keys() and len(data['roi']) is not 0 roi = data['roi'] min_y = 0 max_y = img.shape[0] min_x = 0 max_x = img.shape[1] min_x = max(min_x, roi['min_x'] - self._roi_error_range) max_x = min(max_x, roi['max_x'] + self._roi_error_range) min_y = max(min_y, roi['min_y'] - self._roi_error_range) max_y = min(max_y, roi['max_y'] + self._roi_error_range) crop = [Crop(min_x, min_y, max_x, max_y, p=1)] task = crop + task aug = Compose_albu(task) if not is_3d: aug_data = aug(image=img, mask=label) data['image'], data['label'] = aug_data['image'], aug_data['mask'] else: keys = {} targets = {} for i in range(1, img.shape[2]): keys.update({f'image{i}': 'image'}) keys.update({f'mask{i}': 'mask'}) targets.update({f'image{i}': img[:, :, i]}) targets.update({f'mask{i}': label[:, :, i]}) aug.add_targets(keys) targets.update({'image': img[:, :, 0]}) targets.update({'mask': label[:, :, 0]}) aug_data = aug(**targets) imgs = [aug_data['image']] labels = [aug_data['mask']] for i in range(1, img.shape[2]): imgs.append(aug_data[f'image{i}']) labels.append(aug_data[f'mask{i}']) img = np.stack(imgs, axis=-1) label = np.stack(labels, axis=-1) data['image'] = img data['label'] = label return data
def __init__(self): self.transformer = Compose([ Normalize(mean=[0.46009655, 0.43957878, 0.41827092], std=[0.2108204, 0.20766491, 0.21656131], max_pixel_value=255.0, p = 1.0), ToTensorV2(p=1.0), ])
def collate_fn(x): return list(zip(*x)) if __name__ == '__main__': ap = argparse.ArgumentParser() ap.add_argument('-i', '--path_in', type=str, required=True) ap.add_argument('-o', '--path_out', type=str, required=True) ap.add_argument('-b', '--batch_size', type=int, default=128) ap.add_argument('-w', '--num_workers', type=int, default=4) args = vars(ap.parse_args()) size = max(get_minimal_size(args['path_in']), 160) transform = Compose([ SmallestMaxSize(size), CenterCrop(size, size), ]) dataset = ImageFolderWithPath(transform, args['path_in']) p = Path(args['path_out']) for cls in dataset.class_to_idx.keys(): Path(p, cls).mkdir(parents=True, exist_ok=True) dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=args['batch_size'], num_workers=args['num_workers']) for imgs, paths in dataloader: paths = [Path(p, path) for path in paths] for i, img, in enumerate(imgs): img.save(paths[i].as_posix())
from tensorflow.keras.preprocessing import image import numpy as np import pandas as pd import os from random import sample from albumentations import (Compose, RandomBrightnessContrast, HueSaturationValue, HorizontalFlip, ToGray, MedianBlur) transforms = Compose([ HorizontalFlip(), RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2), # RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0) HueSaturationValue(hue_shift_limit=0, sat_shift_limit=30, val_shift_limit=0), # ToGray() # MedianBlur(blur_limit=5), ]) # TODO: make these arguments prettier IMG_SIZE_FN = (160, 160) IMG_SIZE_VGG = (224, 224) IMG_SIZE_ARCF = (112, 112) IMG_SIZE_DF = (152, 152) IMG_SIZE_DID = (55, 47) IMG_SIZE_OF = (96, 96) def prewhiten(x, train=True):
def __init__(self, imgs: Sequence[str] = None, suffix: str = '.path', line_width: int = 4, im_transforms: Callable[[Any], torch.Tensor] = transforms.Compose( []), mode: str = 'path', augmentation: bool = False, valid_baselines: Sequence[str] = None, merge_baselines: Dict[str, Sequence[str]] = None, valid_regions: Sequence[str] = None, merge_regions: Dict[str, Sequence[str]] = None): """ Reads a list of image-json pairs and creates a data set. Args: imgs (list): suffix (int): Suffix to attach to image base name to load JSON files from. line_width (int): Height of the baseline in the scaled input. target_size (tuple): Target size of the image as a (height, width) tuple. mode (str): Either path, alto, page, xml, or None. In alto, page, and xml mode the baseline paths and image data is retrieved from an ALTO/PageXML file. In `None` mode data is iteratively added through the `add` method. augmentation (bool): Enable/disable augmentation. valid_baselines (list): Sequence of valid baseline identifiers. If `None` all are valid. merge_baselines (dict): Sequence of baseline identifiers to merge. Note that merging occurs after entities not in valid_* have been discarded. valid_regions (list): Sequence of valid region identifiers. If `None` all are valid. merge_regions (dict): Sequence of region identifiers to merge. Note that merging occurs after entities not in valid_* have been discarded. """ super().__init__() self.mode = mode self.im_mode = '1' self.aug = None self.targets = [] # n-th entry contains semantic of n-th class self.class_mapping = { 'aux': { '_start_separator': 0, '_end_separator': 1 }, 'baselines': {}, 'regions': {} } self.class_stats = { 'baselines': defaultdict(int), 'regions': defaultdict(int) } self.num_classes = 2 self.mbl_dict = merge_baselines if merge_baselines is not None else {} self.mreg_dict = merge_regions if merge_regions is not None else {} self.valid_baselines = valid_baselines self.valid_regions = valid_regions if mode in ['alto', 'page', 'xml']: if mode == 'alto': fn = parse_alto elif mode == 'page': fn = parse_page elif mode == 'xml': fn = parse_xml im_paths = [] self.targets = [] for img in imgs: try: data = fn(img) im_paths.append(data['image']) lines = defaultdict(list) for line in data['lines']: if valid_baselines is None or line[ 'script'] in valid_baselines: lines[self.mbl_dict.get(line['script'], line['script'])].append( line['baseline']) self.class_stats['baselines'][self.mbl_dict.get( line['script'], line['script'])] += 1 regions = defaultdict(list) for k, v in data['regions'].items(): if valid_regions is None or k in valid_regions: regions[self.mreg_dict.get(k, k)].extend(v) self.class_stats['regions'][self.mreg_dict.get( k, k)] += len(v) data['regions'] = regions self.targets.append({ 'baselines': lines, 'regions': data['regions'] }) except KrakenInputException as e: logger.warning(e) continue # get line types imgs = im_paths # calculate class mapping line_types = set() region_types = set() for page in self.targets: for line_type in page['baselines'].keys(): line_types.add(line_type) for reg_type in page['regions'].keys(): region_types.add(reg_type) idx = -1 for idx, line_type in enumerate(line_types): self.class_mapping['baselines'][ line_type] = idx + self.num_classes self.num_classes += idx + 1 idx = -1 for idx, reg_type in enumerate(region_types): self.class_mapping['regions'][ reg_type] = idx + self.num_classes self.num_classes += idx + 1 elif mode == 'path': pass elif mode is None: imgs = [] else: raise Exception('invalid dataset mode') if augmentation: from albumentations import ( Compose, ToFloat, FromFloat, RandomRotate90, Flip, OneOf, MotionBlur, MedianBlur, Blur, ShiftScaleRotate, OpticalDistortion, ElasticTransform, RandomBrightnessContrast, HueSaturationValue, ) self.aug = Compose([ ToFloat(), RandomRotate90(), Flip(), OneOf([ MotionBlur(p=0.2), MedianBlur(blur_limit=3, p=0.1), Blur(blur_limit=3, p=0.1), ], p=0.2), ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=0.2), OneOf([ OpticalDistortion(p=0.3), ElasticTransform(p=0.1), ], p=0.2), HueSaturationValue(hue_shift_limit=20, sat_shift_limit=0.1, val_shift_limit=0.1, p=0.3), ], p=0.5) self.imgs = imgs self.line_width = line_width # split image transforms into two. one part giving the final PIL image # before conversion to a tensor and the actual tensor conversion part. self.head_transforms = transforms.Compose(im_transforms.transforms[:2]) self.tail_transforms = transforms.Compose(im_transforms.transforms[2:]) self.seg_type = None
def get_aug(aug, min_area=0., min_visibility=0.): return Compose(aug)
def __init__(self, split: Callable[[str], str] = lambda x: path.splitext(x)[0], suffix: str = '.gt.txt', normalization: Optional[str] = None, whitespace_normalization: bool = True, reorder: bool = True, im_transforms: Callable[[Any], torch.Tensor] = transforms.Compose( []), preload: bool = True, augmentation: bool = False) -> None: """ Reads a list of image-text pairs and creates a ground truth set. Args: split (func): Function for generating the base name without extensions from paths suffix (str): Suffix to attach to image base name for text retrieval mode (str): Image color space. Either RGB (color) or L (grayscale/bw). Only L is compatible with vertical scaling/dewarping. scale (int, tuple): Target height or (width, height) of dewarped line images. Vertical-only scaling is through CenterLineNormalizer, resizing with Lanczos interpolation. Set to 0 to disable. normalization (str): Unicode normalization for gt whitespace_normalization (str): Normalizes unicode whitespace and strips whitespace. reorder (bool): Whether to rearrange code points in "display"/LTR order im_transforms (func): Function taking an PIL.Image and returning a tensor suitable for forward passes. preload (bool): Enables preloading and preprocessing of image files. """ self.suffix = suffix self.split = lambda x: split(x) + self.suffix self._images = [] # type: Union[List[Image], List[torch.Tensor]] self._gt = [] # type: List[str] self.alphabet = Counter() # type: Counter self.text_transforms = [] # type: List[Callable[[str], str]] # split image transforms into two. one part giving the final PIL image # before conversion to a tensor and the actual tensor conversion part. self.head_transforms = transforms.Compose(im_transforms.transforms[:2]) self.tail_transforms = transforms.Compose(im_transforms.transforms[2:]) self.aug = None self.preload = preload self.seg_type = 'bbox' # built text transformations if normalization: self.text_transforms.append( lambda x: unicodedata.normalize(cast(str, normalization), x)) if whitespace_normalization: self.text_transforms.append( lambda x: regex.sub('\s', ' ', x).strip()) if reorder: self.text_transforms.append(bd.get_display) if augmentation: from albumentations import ( Compose, ToFloat, FromFloat, Flip, OneOf, MotionBlur, MedianBlur, Blur, ShiftScaleRotate, OpticalDistortion, ElasticTransform, RandomBrightnessContrast, ) self.aug = Compose([ ToFloat(), OneOf([ MotionBlur(p=0.2), MedianBlur(blur_limit=3, p=0.1), Blur(blur_limit=3, p=0.1), ], p=0.2), ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=0.2), OneOf([ OpticalDistortion(p=0.3), ElasticTransform(p=0.1), ], p=0.2), ], p=0.5) self.im_mode = '1'
def __init__(self, normalization: Optional[str] = None, whitespace_normalization: bool = True, reorder: bool = True, im_transforms: Callable[[Any], torch.Tensor] = transforms.Compose( []), preload: bool = True, augmentation: bool = False) -> None: self._images = [] # type: Union[List[Image], List[torch.Tensor]] self._gt = [] # type: List[str] self.alphabet = Counter() # type: Counter self.text_transforms = [] # type: List[Callable[[str], str]] # split image transforms into two. one part giving the final PIL image # before conversion to a tensor and the actual tensor conversion part. self.head_transforms = transforms.Compose(im_transforms.transforms[:2]) self.tail_transforms = transforms.Compose(im_transforms.transforms[2:]) self.transforms = im_transforms self.preload = preload self.aug = None self.seg_type = 'baselines' # built text transformations if normalization: self.text_transforms.append( lambda x: unicodedata.normalize(cast(str, normalization), x)) if whitespace_normalization: self.text_transforms.append( lambda x: regex.sub('\s', ' ', x).strip()) if reorder: self.text_transforms.append(bd.get_display) if augmentation: from albumentations import ( Compose, ToFloat, FromFloat, Flip, OneOf, MotionBlur, MedianBlur, Blur, ShiftScaleRotate, OpticalDistortion, ElasticTransform, RandomBrightnessContrast, ) self.aug = Compose([ ToFloat(), OneOf([ MotionBlur(p=0.2), MedianBlur(blur_limit=3, p=0.1), Blur(blur_limit=3, p=0.1), ], p=0.2), ShiftScaleRotate( shift_limit=0.0625, scale_limit=0.2, rotate_limit=3, p=0.2), OneOf([ OpticalDistortion(p=0.3), ElasticTransform(p=0.1), ], p=0.2), ], p=0.5) self.im_mode = '1'
def __init__(self,flag): self.traintransform = Compose([HorizontalFlip(),Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),ToTensor()]) self.testtransform = Compose([Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),ToTensor()]) self.flag = flag
from albumentations.pytorch import ToTensorV2 model = segmentation_models_pytorch.Unet("resnet18", encoder_weights=None, classes=1, activation=None) model.load_state_dict(torch.load("./models/model_10.pth")) dataset_path = "/home/shouki/Desktop/Programming/Python/AI/Datasets/ImageData/CarvanaImageMaskingDataset" image_size = (128, 128) image_path = os.path.join(dataset_path, "test") num_images = 3 transform = Compose([ Resize(*image_size, p=1.0), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], p=1.0), ToTensorV2() ]) figure, axes = plt.subplots(num_images, 2, figsize=(5, 7.5)) for index, ((ax1, ax2), image_name) in enumerate( zip(axes, np.random.choice(os.listdir(image_path), num_images))): image = np.asarray( Image.open(os.path.join(image_path, image_name)).resize(image_size).convert("RGB")) mask = torch.sigmoid(model(transform( image=image)["image"].unsqueeze(0))).squeeze().detach().numpy() ax1.imshow(image) ax1.set_xticks([]) ax1.set_yticks([])
def build_test(self): return Compose([ CenterCrop(64, 32), ToTensor(), ])
def run(*options, cfg=None, debug=False): """Run training and validation of model Notes: Options can be passed in via the options argument and loaded from the cfg file Options loaded from default.py will be overridden by those loaded from cfg file Options passed in via options argument will override those loaded from cfg file Args: *options (str, int, optional): Options used to overide what is loaded from the config. To see what options are available consult default.py cfg (str, optional): Location of config file to load. Defaults to None. debug (bool): Places scripts in debug/test mode and only executes a few iterations """ update_config(config, options=options, config_file=cfg) # we will write the model under outputs / config_file_name / model_dir config_file_name = "default_config" if not cfg else cfg.split("/")[-1].split(".")[0] # Start logging load_log_configuration(config.LOG_CONFIG) logger = logging.getLogger(__name__) logger.debug(config.WORKERS) epochs_per_cycle = config.TRAIN.END_EPOCH // config.TRAIN.SNAPSHOTS torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK torch.manual_seed(config.SEED) if torch.cuda.is_available(): torch.cuda.manual_seed_all(config.SEED) np.random.seed(seed=config.SEED) device = "cpu" if torch.cuda.is_available(): device = "cuda" # Setup Augmentations basic_aug = Compose( [ Normalize(mean=(config.TRAIN.MEAN,), std=(config.TRAIN.STD,), max_pixel_value=config.TRAIN.MAX,), PadIfNeeded( min_height=config.TRAIN.PATCH_SIZE, min_width=config.TRAIN.PATCH_SIZE, border_mode=config.OPENCV_BORDER_CONSTANT, always_apply=True, mask_value=mask_value, value=0, ), Resize( config.TRAIN.AUGMENTATIONS.RESIZE.HEIGHT, config.TRAIN.AUGMENTATIONS.RESIZE.WIDTH, always_apply=True, ), PadIfNeeded( min_height=config.TRAIN.AUGMENTATIONS.PAD.HEIGHT, min_width=config.TRAIN.AUGMENTATIONS.PAD.WIDTH, border_mode=config.OPENCV_BORDER_CONSTANT, always_apply=True, mask_value=mask_value, value=0, ), ] ) if config.TRAIN.AUGMENTATION: train_aug = Compose([basic_aug, HorizontalFlip(p=0.5)]) val_aug = basic_aug else: train_aug = val_aug = basic_aug PenobscotDataset = get_patch_dataset(config) train_set = PenobscotDataset( config.DATASET.ROOT, config.TRAIN.PATCH_SIZE, config.TRAIN.STRIDE, split="train", transforms=train_aug, n_channels=config.MODEL.IN_CHANNELS, complete_patches_only=config.TRAIN.COMPLETE_PATCHES_ONLY, ) val_set = PenobscotDataset( config.DATASET.ROOT, config.TRAIN.PATCH_SIZE, config.TRAIN.STRIDE, split="val", transforms=val_aug, n_channels=config.MODEL.IN_CHANNELS, complete_patches_only=config.VALIDATION.COMPLETE_PATCHES_ONLY, ) logger.info(train_set) logger.info(val_set) n_classes = train_set.n_classes train_loader = data.DataLoader( train_set, batch_size=config.TRAIN.BATCH_SIZE_PER_GPU, num_workers=config.WORKERS, shuffle=True, ) if debug: val_set = data.Subset(val_set, range(3)) val_loader = data.DataLoader(val_set, batch_size=config.VALIDATION.BATCH_SIZE_PER_GPU, num_workers=config.WORKERS) model = getattr(models, config.MODEL.NAME).get_seg_model(config) model = model.to(device) # Send to GPU optimizer = torch.optim.SGD( model.parameters(), lr=config.TRAIN.MAX_LR, momentum=config.TRAIN.MOMENTUM, weight_decay=config.TRAIN.WEIGHT_DECAY, ) try: output_dir = generate_path( config.OUTPUT_DIR, git_branch(), git_hash(), config_file_name, config.TRAIN.MODEL_DIR, current_datetime(), ) except TypeError: output_dir = generate_path(config.OUTPUT_DIR, config_file_name, config.TRAIN.MODEL_DIR, current_datetime(),) summary_writer = create_summary_writer(log_dir=path.join(output_dir, config.LOG_DIR)) snapshot_duration = epochs_per_cycle * len(train_loader) if not debug else 2 * len(train_loader) scheduler = CosineAnnealingScheduler( optimizer, "lr", config.TRAIN.MAX_LR, config.TRAIN.MIN_LR, cycle_size=snapshot_duration ) # weights are inversely proportional to the frequency of the classes in # the training set class_weights = torch.tensor(config.DATASET.CLASS_WEIGHTS, device=device, requires_grad=False) criterion = torch.nn.CrossEntropyLoss(weight=class_weights, ignore_index=mask_value, reduction="mean") trainer = create_supervised_trainer(model, optimizer, criterion, _prepare_batch, device=device) trainer.add_event_handler(Events.ITERATION_STARTED, scheduler) trainer.add_event_handler( Events.ITERATION_COMPLETED, logging_handlers.log_training_output(log_interval=config.TRAIN.BATCH_SIZE_PER_GPU), ) trainer.add_event_handler(Events.EPOCH_STARTED, logging_handlers.log_lr(optimizer)) trainer.add_event_handler( Events.EPOCH_STARTED, tensorboard_handlers.log_lr(summary_writer, optimizer, "epoch"), ) trainer.add_event_handler( Events.ITERATION_COMPLETED, tensorboard_handlers.log_training_output(summary_writer), ) def _select_pred_and_mask(model_out_dict): return (model_out_dict["y_pred"].squeeze(), model_out_dict["mask"].squeeze()) evaluator = create_supervised_evaluator( model, _prepare_batch, metrics={ "pixacc": pixelwise_accuracy(n_classes, output_transform=_select_pred_and_mask), "nll": Loss(criterion, output_transform=_select_pred_and_mask), "cacc": class_accuracy(n_classes, output_transform=_select_pred_and_mask), "mca": mean_class_accuracy(n_classes, output_transform=_select_pred_and_mask), "ciou": class_iou(n_classes, output_transform=_select_pred_and_mask), "mIoU": mean_iou(n_classes, output_transform=_select_pred_and_mask), }, device=device, ) # Set the validation run to start on the epoch completion of the training run trainer.add_event_handler(Events.EPOCH_COMPLETED, Evaluator(evaluator, val_loader)) evaluator.add_event_handler( Events.EPOCH_COMPLETED, logging_handlers.log_metrics( "Validation results", metrics_dict={ "nll": "Avg loss :", "pixacc": "Pixelwise Accuracy :", "mca": "Avg Class Accuracy :", "mIoU": "Avg Class IoU :", }, ), ) evaluator.add_event_handler( Events.EPOCH_COMPLETED, tensorboard_handlers.log_metrics( summary_writer, trainer, "epoch", metrics_dict={ "mIoU": "Validation/mIoU", "nll": "Validation/Loss", "mca": "Validation/MCA", "pixacc": "Validation/Pixel_Acc", }, ), ) def _select_max(pred_tensor): return pred_tensor.max(1)[1] def _tensor_to_numpy(pred_tensor): return pred_tensor.squeeze().cpu().numpy() transform_func = compose(np_to_tb, decode_segmap, _tensor_to_numpy,) transform_pred = compose(transform_func, _select_max) evaluator.add_event_handler( Events.EPOCH_COMPLETED, create_image_writer(summary_writer, "Validation/Image", "image"), ) evaluator.add_event_handler( Events.EPOCH_COMPLETED, create_image_writer(summary_writer, "Validation/Mask", "mask", transform_func=transform_func), ) evaluator.add_event_handler( Events.EPOCH_COMPLETED, create_image_writer(summary_writer, "Validation/Pred", "y_pred", transform_func=transform_pred), ) def snapshot_function(): return (trainer.state.iteration % snapshot_duration) == 0 checkpoint_handler = SnapshotHandler(output_dir, config.MODEL.NAME, extract_metric_from("mIoU"), snapshot_function,) evaluator.add_event_handler(Events.EPOCH_COMPLETED, checkpoint_handler, {"model": model}) logger.info("Starting training") if debug: trainer.run( train_loader, max_epochs=config.TRAIN.END_EPOCH, epoch_length=config.TRAIN.BATCH_SIZE_PER_GPU, seed=config.SEED, ) else: trainer.run(train_loader, max_epochs=config.TRAIN.END_EPOCH, epoch_length=len(train_loader), seed=config.SEED)
def test(*options, cfg=None, debug=False): update_config(config, options=options, config_file=cfg) n_classes = config.DATASET.NUM_CLASSES # Start logging load_log_configuration(config.LOG_CONFIG) logger = logging.getLogger(__name__) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") log_dir, model_name = os.path.split(config.TEST.MODEL_PATH) # load model: model = getattr(models, config.MODEL.NAME).get_seg_model(config) model.load_state_dict(torch.load(config.TEST.MODEL_PATH), strict=False) model = model.to(device) # Send to GPU if available running_metrics_overall = runningScore(n_classes) # Augmentation section_aug = Compose([ Normalize( mean=(config.TRAIN.MEAN, ), std=(config.TRAIN.STD, ), max_pixel_value=1, ) ]) patch_aug = Compose([ Resize( config.TRAIN.AUGMENTATIONS.RESIZE.HEIGHT, config.TRAIN.AUGMENTATIONS.RESIZE.WIDTH, always_apply=True, ), PadIfNeeded( min_height=config.TRAIN.AUGMENTATIONS.PAD.HEIGHT, min_width=config.TRAIN.AUGMENTATIONS.PAD.WIDTH, border_mode=cv2.BORDER_CONSTANT, always_apply=True, mask_value=255, ), ]) pre_processing = _compose_processing_pipeline(config.TRAIN.DEPTH, aug=patch_aug) output_processing = _output_processing_pipeline(config) splits = ["test1", "test2" ] if "Both" in config.TEST.SPLIT else [config.TEST.SPLIT] for sdx, split in enumerate(splits): labels = np.load( path.join(config.DATASET.ROOT, "test_once", split + "_labels.npy")) section_file = path.join(config.DATASET.ROOT, "splits", "section_" + split + ".txt") _write_section_file(labels, section_file) _evaluate_split( split, section_aug, model, pre_processing, output_processing, device, running_metrics_overall, config, debug=debug, ) # FINAL TEST RESULTS: score, class_iou = running_metrics_overall.get_scores() logger.info("--------------- FINAL RESULTS -----------------") logger.info(f'Pixel Acc: {score["Pixel Acc: "]:.3f}') for cdx, class_name in enumerate(_CLASS_NAMES): logger.info( f' {class_name}_accuracy {score["Class Accuracy: "][cdx]:.3f}') logger.info(f'Mean Class Acc: {score["Mean Class Acc: "]:.3f}') logger.info(f'Freq Weighted IoU: {score["Freq Weighted IoU: "]:.3f}') logger.info(f'Mean IoU: {score["Mean IoU: "]:0.3f}') # Save confusion matrix: confusion = score["confusion_matrix"] np.savetxt(path.join(log_dir, "confusion.csv"), confusion, delimiter=" ")
from albumentations import Compose, Resize, RandomCrop, Flip, HorizontalFlip, VerticalFlip, Transpose, RandomRotate90, \ ShiftScaleRotate, OneOf, Blur, MotionBlur, MedianBlur, GaussianBlur, RandomBrightness, RandomBrightnessContrast, \ Normalize from albumentations.pytorch import ToTensor train_aug = Compose([ Resize(128, 128, always_apply=True), Normalize(mean=0.06922848809290576, std=0.20515700083327537), ToTensor() ]) valid_aug = Compose([ Resize(128, 128, always_apply=True), Normalize(mean=0.06922848809290576, std=0.20515700083327537), ToTensor() ])
def policy_transform(split, policies=None, size=512, per_image_norm=False, mean_std=None, **kwargs): means = np.array([127.5, 127.5, 127.5, 127.5]) stds = np.array([255.0, 255.0, 255.0, 255.0]) base_aug = Compose([ # RandomRotate90(), # Flip(), Transpose(), ]) if policies is None: policies = [] if isinstance(policies, str): with open(policies, 'r') as fid: policies = eval(fid.read()) policies = itertools.chain.from_iterable(policies) aug_list = [] for policy in policies: op_1, params_1 = policy[0] op_2, params_2 = policy[1] # print('op_1 ', op_1, ' pa_1 ', params_1) # print('op_2 ', op_2, ' pa_2 ', params_2) aug = Compose([ globals().get(op_1)(**params_1), globals().get(op_2)(**params_2), ]) aug_list.append(aug) # print('len(aug_list):', len(aug_list)) resize = Resize(height=size, width=size, always_apply=True) def transform(image): if split == 'train': image = base_aug(image=image)['image'] # if len(aug_list) > 0: # aug = random.choice(aug_list) # image = aug(image=image)['image'] image = resize(image=image)['image'] else: #if size != image.shape[0]: image = resize(image=image)['image'] image = image.astype(np.float32) if per_image_norm: mean = np.mean(image.reshape(-1, 3), axis=0) std = np.std(image.reshape(-1, 3), axis=0) image -= mean image /= (std + 0.0000001) else: image -= means image /= stds image = np.transpose(image, (2, 0, 1)) return image return transform
def __init__(self): self.transformer = Compose([ Normalize(mean=[0.46009655, 0.43957878, 0.41827092], std=[0.2108204, 0.20766491, 0.21656131], max_pixel_value=255.0, p = 1.0), # Flip(p=0.5), ToTensorV2(p=1.0), ], bbox_params={'format': 'pascal_voc', 'min_area': 0, 'min_visibility': 0, 'label_fields': ['labels']})
def val_transform(p=1): return Compose([Normalize(p=1, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))], p=p)
return X, y ################### # Model parameters SSD_path = "c:/Users/Szedlák Barnabás/Anaconda/Thesis/" x_size = 96 y_size = 96 # Augmentation settings train_augmentation = Compose( [ HorizontalFlip( p=0.5), # p = probalility of given augmentation feature happening Transpose(p=0.5), ShiftScaleRotate(p=0.25, rotate_limit=0), RandomBrightnessContrast(p=0.5) ], p=1) # final p is the probabbilit of the whole #VerticalFlip(p=0.5), #RandomRotate90(p=0.5), # reduces learning rate on slowing on plateau callback learning_rate_reducer = ReduceLROnPlateau(factor=0.1, cooldown=2, patience=5, verbose=1, min_lr=0.1e-5) # monitor = # model autosave callbacks
def get_aug(aug, min_area=0., min_visibility=0.): return Compose(aug, bbox_params=BboxParams(format='coco', min_area=min_area, min_visibility=min_visibility, label_fields=['category_id']))
def create_val_transforms(size=224): return Compose([ IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC), PadIfNeeded(min_height=size, min_width=size, border_mode=cv2.BORDER_CONSTANT), ])
def __getitem__(self, idx): file = self.files[idx] if self.mode == 'train' or 'val': file_name = 'train' else: file_name = 'test' file_path = os.path.join(os.path.join(PATH, file_name + '_images'), file) image = cv2.imread(file_path) mask = rleToMask(self.masks[idx]) mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) train_aug = Compose([ OneOf([ ShiftScaleRotate(), VerticalFlip(p=0.8), HorizontalFlip(p=0.8), ], p=0.6), OneOf([ RandomBrightnessContrast(), MotionBlur(p=0.5), MedianBlur(blur_limit=3, p=0.5), Blur(blur_limit=3, p=0.5), ], p=0.6), OneOf([ IAAAdditiveGaussianNoise(), GaussNoise(), ], p=0.6), Normalize(mean=mean, std=std, p=1) # ToTensor() ]) val_aug = Compose([Normalize(mean=mean, std=std, p=1), ToTensor()]) if self.mode == 'train': crop_images, crop_masks, new_crop_images = [], [], [] # crop_labels = np.zeros((num_crop, 4)) for i in range(num_crop): crop_images.append(image[:, i * (1600 // num_crop):(i + 1) * (1600 // num_crop), :]) # crop_masks.append(mask[:, :, i*(1600//num_crop):(i+1)*(1600//num_crop)]) # for i in range(num_crop): # for ch in range(4): # if (crop_masks[i][ch, :, :] == np.zeros((crop_masks[i][ch, :, :].shape))).all(): # crop_labels[i][ch] = 0 # else: # crop_labels[i][ch] = 1 for num in range(len(crop_images)): augmented = train_aug(image=crop_images[num]) new_crop_images.append(augmented['image']) # crop_labels[num] = np.array(crop_labels[num]) # crop_labels[num] = torch.tensor(crop_labels[num], dtype=torch.float32) new_crop_images = np.transpose(np.array(new_crop_images), (0, 3, 1, 2)) # return new_crop_images, crop_labels label = np.array(self.labels[idx]) label = torch.tensor(label, dtype=torch.float32) return (new_crop_images, label) elif self.mode == 'val': augmented = val_aug(image=image) image = augmented['image'] label = np.array(self.labels[idx]) label = torch.tensor(label, dtype=torch.float32) return (image, label)
def train(model, cfg, model_cfg, start_epoch=0): cfg.batch_size = 32 if cfg.batch_size < 1 else cfg.batch_size cfg.val_batch_size = cfg.batch_size cfg.input_normalization = model_cfg.input_normalization crop_size = model_cfg.crop_size loss_cfg = edict() loss_cfg.instance_loss = SigmoidBinaryCrossEntropyLoss() loss_cfg.instance_loss_weight = 1.0 loss_cfg.instance_aux_loss = SigmoidBinaryCrossEntropyLoss() loss_cfg.instance_aux_loss_weight = 0.4 num_epochs = 120 num_masks = 1 train_augmentator = Compose([ Flip(), RandomRotate90(), ShiftScaleRotate(shift_limit=0.03, scale_limit=0, rotate_limit=(-3, 3), border_mode=0, p=0.75), PadIfNeeded( min_height=crop_size[0], min_width=crop_size[1], border_mode=0), RandomCrop(*crop_size), RandomBrightnessContrast(brightness_limit=(-0.25, 0.25), contrast_limit=(-0.15, 0.4), p=0.75), RGBShift(r_shift_limit=10, g_shift_limit=10, b_shift_limit=10, p=0.75) ], p=1.0) val_augmentator = Compose([ PadIfNeeded( min_height=crop_size[0], min_width=crop_size[1], border_mode=0), RandomCrop(*crop_size) ], p=1.0) def scale_func(image_shape): return random.uniform(0.75, 1.25) points_sampler = MultiPointSampler(model_cfg.num_max_points, prob_gamma=0.7, merge_objects_prob=0.15, max_num_merged_objects=2) trainset = SBDDataset( cfg.SBD_PATH, split='train', num_masks=num_masks, augmentator=train_augmentator, points_from_one_object=False, input_transform=model_cfg.input_transform, min_object_area=80, keep_background_prob=0.0, image_rescale=scale_func, points_sampler=points_sampler, samples_scores_path='./models/sbd/sbd_samples_weights.pkl', samples_scores_gamma=1.25) valset = SBDDataset(cfg.SBD_PATH, split='val', augmentator=val_augmentator, num_masks=num_masks, points_from_one_object=False, input_transform=model_cfg.input_transform, min_object_area=80, image_rescale=scale_func, points_sampler=points_sampler) optimizer_params = {'lr': 5e-4, 'betas': (0.9, 0.999), 'eps': 1e-8} lr_scheduler = partial(torch.optim.lr_scheduler.MultiStepLR, milestones=[100], gamma=0.1) trainer = ISTrainer(model, cfg, model_cfg, loss_cfg, trainset, valset, optimizer='adam', optimizer_params=optimizer_params, lr_scheduler=lr_scheduler, checkpoint_interval=5, image_dump_interval=100, metrics=[AdaptiveIoU()], max_interactive_points=model_cfg.num_max_points) logger.info(f'Starting Epoch: {start_epoch}') logger.info(f'Total Epochs: {num_epochs}') for epoch in range(start_epoch, num_epochs): trainer.training(epoch) trainer.validation(epoch)
def _composeAugmentations(augmentations): bbox_params = BboxParams(format='pascal_voc', label_fields=['labels'], min_visibility=.8) return Compose(augmentations or [], bbox_params=bbox_params)
# Also prepare the test data test[['ID', 'Image', 'Diagnosis']] = test['ID'].str.split('_', expand=True) test['Image'] = 'ID_' + test['Image'] test = test[['Image', 'Label']] test.drop_duplicates(inplace=True) test.to_csv('test.csv', index=False) # Data loaders transform_train = Compose([ CenterCrop(200, 200), #Resize(224, 224), HorizontalFlip(), RandomBrightnessContrast(), ShiftScaleRotate(), ToTensor() ]) transform_test = Compose([ CenterCrop(200, 200), #Resize(224, 224), ToTensor() ]) train_dataset = IntracranialDataset(csv_file='train.csv', data_dir=dir_train_img, transform=transform_train, labels=True)
def get_test_transform(input_height, input_width): return Compose([Resize(input_height, input_width), ToTensor()])