def get_data(scans, home_path, validation_patients, bs, size=512, normalize_stats=None, tfms=None): if tfms is None: tfms = get_transforms( do_flip=True, max_rotate=10.0, max_lighting=0, p_lighting=0, max_warp=0, max_zoom=1.2, ) data = (SegmentationItemList.from_df( pd.DataFrame(scans), home_path).split_by_valid_func(lambda path: any( [p in str(path) for p in validation_patients])).label_from_func( get_y_fn, classes=CODES).transform( tfms, size=size, tfm_y=True).databunch(bs=bs).normalize(normalize_stats)) return data
def _get_data_bunch_segmentationitemlist( path: Union[Path, str], transform: bool, im_size: int, bs: int, classes: List[str]) -> ImageDataBunch: """ Create ImageDataBunch and return it. TODO in future version is to allow users to pass in their own image bunch or their own Transformation objects (instead of using fastai's <get_transforms>) Args: path (Union[Path, str]): path to data to create databunch with transform (bool): a flag to set fastai default transformations (get_transforms()) im_size (int): image size of databunch bs (int): batch size of databunch Returns: ImageDataBunch """ path = path if type(path) is Path else Path(path) tfms = get_transforms() if transform else None im_path = path / "images" anno_path = path / "segmentation-masks" get_gt_filename = lambda x: anno_path / f"{x.stem}.png" # Load data return (SegmentationItemList.from_folder(im_path).split_by_rand_pct( valid_pct=0.33).label_from_func( get_gt_filename, classes=classes).transform( tfms=tfms, size=im_size, tfm_y=True).databunch( bs=bs, num_workers=db_num_workers()).normalize(imagenet_stats) )
def create_databunch(path_img, path_lbl, codes, input_size, bs, split_pct=0.2): """ Creates fastai databunch object to be put into Learner object. Parameters ---------------------------------------- path_img : Path object Path to the directory containing the training images. path_lbl : Path object Path to the directory containing the labels for the above images. The labels should have the same filename as their corresponding image but with a .png file extension. codes : ndarray Contains names corresponding to the segmented objects in your labels. The dtype of the array should be "<U17". input_size : tuple Contains the width and height of the input image that is accepted by your learner. bs : int Batch size split_pct : float, optional The percentage of images that will be put into your validation set. Defaults to 20%. Returns ------------------------------------------ data : Databunch """ data = (SegmentationItemList.from_folder(path_img).split_by_rand_pct( split_pct).label_from_func( lambda x: path_lbl / f'{x.stem}.png', classes=codes).transform( get_transforms(flip_vert=True, max_warp=None), tfm_y=True, size=input_size).databunch(bs=bs).normalize()) return data
def get_data(train_sampler=None): data = (SegmentationItemList.from_folder(chip_dir).split_by_folder( train=train_img_dir, valid='val-img').label_from_func( get_label_path, classes=classes).transform( get_transforms(flip_vert=self.train_opts.flip_vert), size=size, tfm_y=True).databunch(bs=self.train_opts.batch_sz, num_workers=num_workers, train_sampler=train_sampler)) return data
def create_training_dataset(self): """Creates a fastai segmentation dataset and stores it as an instance attribute. """ logger.info("Creating training dataset from saved images.") src = (SegmentationItemList.from_folder( self.data_dir).split_by_rand_pct().label_from_func( self.get_label_name, classes=self.codes)) self.data = (src.transform( get_transforms(), size=self.image_size, tfm_y=True).databunch( bs=self.batch_size).normalize(imagenet_stats))
def tiny_seg_databunch(tiny_seg_data_path, seg_classes): """ Returns a databunch object for the segmentation tiny fridge objects dataset. """ get_gt_filename = ( lambda x: f"{tiny_seg_data_path}/segmentation-masks/{x.stem}.png") return ( SegmentationItemList.from_folder(tiny_seg_data_path).split_by_rand_pct( valid_pct=0.1, seed=10).label_from_func( get_gt_filename, classes=seg_classes).transform( get_transforms(), tfm_y=True, size=50).databunch( bs=8, num_workers=db_num_workers()).normalize(imagenet_stats) )
def eval_model(path, codes, input_size, bs, learner, train_dirname='train', test_dirname='test', labels_dirname='labels'): """ Evaluates the model on a test set. Parameters ------------------------------ path : Path object Path to the testing directory. Contained within the directory should be three subdirectories for the training images, the test images, and the labels for both training and test set. codes : ndarray Contains names corresponding to the segmented objects in your labels. The dtype of the array should be "<U17". input_size : tuple Contains the width and height of the input image that is accepted by your learner. bs : int Batch size learner : Learner Fastai Learner object. Should be a u-net using the Dice metric. train_dirname : str Name of subdirectory containing training images test_dirname : str Name of subdirectory containing test images labels_dirname : str Name of subdirectory containing labels for both training and test set. Returns ------------------------------------------------------ eval : tuple Contains the training loss and the accuracy metric of the model on the test set. """ data_test = (SegmentationItemList.from_folder(path).split_by_folder( train=train_dirname, valid=test_dirname).label_from_func( lambda x: path / f'{labels_dirname}/{x.stem}.png', classes=codes).transform( get_transforms(flip_vert=True, max_warp=None), tfm_y=True, size=input_size).databunch(bs=bs).normalize()) eval = learner.validate(data_test.valid_dl) return eval
def get_data(scans, valid_func, bs, size, label_func=None): if label_func is None: label_func = get_y_fn return (SegmentationItemList.from_df( pd.DataFrame(scans, columns=['files']), '.').split_by_valid_func(valid_func).label_from_func( label_func, classes=CODES).transform( get_transforms(max_rotate=5.0, max_lighting=0, p_lighting=0, max_warp=0), size=size, tfm_y=True, ).databunch(bs=bs, num_workers=0) # one worker for reproducibility .normalize(imagenet_stats))
def get_databunch(data_dir, batch_sz=8, num_workers=4, sample_pct=1.0, seed=1234): def get_y_fn(x): return join(str(x.parent) + 'annot', x.name) fnames = get_image_files(join(data_dir, 'test')) img = open_image(fnames[0]) src_size = np.array(img.data.shape[1:]) size = src_size // 2 data = (SegmentationItemList.from_folder(data_dir).use_partial_data( sample_pct, seed).split_by_folder(valid='val').label_from_func( get_y_fn, classes=codes).transform( get_transforms(), size=size, tfm_y=True).databunch( bs=batch_sz, num_workers=num_workers).normalize(imagenet_stats)) return data
import torchvision from fastai.vision import ImageDataBunch, cnn_learner, unet_learner, SegmentationItemList, imagenet_stats data = ImageDataBunch.from_csv('fixtures/classification').normalize( imagenet_stats) learner = cnn_learner(data, torchvision.models.resnet34) learner.export() data = (SegmentationItemList.from_folder( 'fixtures/segmentation/images').split_none().label_from_func( lambda x: f'fixtures/segmentation/masks/{x.stem}.jpg', classes=[0, 1, 2]).databunch().normalize(imagenet_stats)) learner = unet_learner(data, torchvision.models.resnet50) learner.export('../export.pkl')
def train(self, tmp_dir): """Train a model. This downloads any previous output saved to the train_uri, starts training (or resumes from a checkpoint), periodically syncs contents of train_dir to train_uri and after training finishes. Args: tmp_dir: (str) path to temp directory """ self.log_options() # Sync output of previous training run from cloud. train_uri = self.backend_opts.train_uri train_dir = get_local_path(train_uri, tmp_dir) make_dir(train_dir) sync_from_dir(train_uri, train_dir) # Get zip file for each group, and unzip them into chip_dir. chip_dir = join(tmp_dir, 'chips') make_dir(chip_dir) for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'): zip_path = download_if_needed(zip_uri, tmp_dir) with zipfile.ZipFile(zip_path, 'r') as zipf: zipf.extractall(chip_dir) # Setup data loader. def get_label_path(im_path): return Path(str(im_path.parent)[:-4] + '-labels') / im_path.name size = self.task_config.chip_size class_map = self.task_config.class_map classes = class_map.get_class_names() if 0 not in class_map.get_keys(): classes = ['nodata'] + classes num_workers = 0 if self.train_opts.debug else 4 data = (SegmentationItemList.from_folder(chip_dir) .split_by_folder(train='train-img', valid='val-img')) train_count = None if self.train_opts.train_count is not None: train_count = min(len(data.train), self.train_opts.train_count) elif self.train_opts.train_prop != 1.0: train_count = int(round(self.train_opts.train_prop * len(data.train))) train_items = data.train.items if train_count is not None: train_inds = np.random.permutation(np.arange(len(data.train)))[0:train_count] train_items = train_items[train_inds] items = np.concatenate([train_items, data.valid.items]) data = (SegmentationItemList(items, chip_dir) .split_by_folder(train='train-img', valid='val-img') .label_from_func(get_label_path, classes=classes) .transform(get_transforms(flip_vert=self.train_opts.flip_vert), size=size, tfm_y=True) .databunch(bs=self.train_opts.batch_sz, num_workers=num_workers)) print(data) # Setup learner. ignore_idx = 0 metrics = [ Precision(average='weighted', clas_idx=1, ignore_idx=ignore_idx), Recall(average='weighted', clas_idx=1, ignore_idx=ignore_idx), FBeta(average='weighted', clas_idx=1, beta=1, ignore_idx=ignore_idx)] model_arch = getattr(models, self.train_opts.model_arch) learn = unet_learner( data, model_arch, metrics=metrics, wd=self.train_opts.weight_decay, bottle=True, path=train_dir) learn.unfreeze() if self.train_opts.mixed_prec and torch.cuda.is_available(): # This loss_scale works for Resnet 34 and 50. You might need to adjust this # for other models. learn = learn.to_fp16(loss_scale=256) # Setup callbacks and train model. model_path = get_local_path(self.backend_opts.model_uri, tmp_dir) pretrained_uri = self.backend_opts.pretrained_uri if pretrained_uri: print('Loading weights from pretrained_uri: {}'.format( pretrained_uri)) pretrained_path = download_if_needed(pretrained_uri, tmp_dir) learn.model.load_state_dict( torch.load(pretrained_path, map_location=learn.data.device), strict=False) # Save every epoch so that resume functionality provided by # TrackEpochCallback will work. callbacks = [ TrackEpochCallback(learn), MySaveModelCallback(learn, every='epoch'), MyCSVLogger(learn, filename='log'), ExportCallback(learn, model_path, monitor='f_beta'), SyncCallback(train_dir, self.backend_opts.train_uri, self.train_opts.sync_interval) ] oversample = self.train_opts.oversample if oversample: weights = get_oversampling_weights( data.train_ds, oversample['rare_class_ids'], oversample['rare_target_prop']) oversample_callback = OverSamplingCallback(learn, weights=weights) callbacks.append(oversample_callback) if self.train_opts.debug: if oversample: oversample_callback.on_train_begin() make_debug_chips(data, class_map, tmp_dir, train_uri) if self.train_opts.log_tensorboard: callbacks.append(TensorboardLogger(learn, 'run')) if self.train_opts.run_tensorboard: log.info('Starting tensorboard process') log_dir = join(train_dir, 'logs', 'run') tensorboard_process = Popen( ['tensorboard', '--logdir={}'.format(log_dir)]) terminate_at_exit(tensorboard_process) lr = self.train_opts.lr num_epochs = self.train_opts.num_epochs if self.train_opts.one_cycle: if lr is None: learn.lr_find() learn.recorder.plot(suggestion=True, return_fig=True) lr = learn.recorder.min_grad_lr print('lr_find() found lr: {}'.format(lr)) learn.fit_one_cycle(num_epochs, lr, callbacks=callbacks) else: learn.fit(num_epochs, lr, callbacks=callbacks) if self.train_opts.run_tensorboard: tensorboard_process.terminate() # Since model is exported every epoch, we need some other way to # show that training is finished. str_to_file('done!', self.backend_opts.train_done_uri) # Sync output to cloud. sync_to_dir(train_dir, self.backend_opts.train_uri)
def train(test, s3_data, batch): """Train a segmentation model using fastai and PyTorch on the Camvid dataset. This will write to a CSV log after each epoch, sync output to S3, and resume training from a checkpoint. Note: This is an adaptation of https://github.com/fastai/course-v3/blob/master/nbs/dl1/lesson3-camvid-tiramisu.ipynb and uses the Camvid Tiramisu-subset dataset described in the fast.ai course at half-resolution. This takes about a minute to get to around 90% accuracy on a p3.2xlarge. """ if batch: run_on_batch() # Setup hyperparams. bs = 8 wd = 1e-2 lr = 2e-3 num_epochs = 10 sample_pct = 1.0 model_arch = models.resnet34 fp16 = False sync_interval = 20 # Don't sync during training for such a small job. seed = 1234 if test: bs = 1 num_epochs = 2 sample_pct = 0.01 model_arch = models.resnet18 # Setup paths. data_uri = Path('/opt/data/camvid/CamVid') train_uri = Path('/opt/data/camvid/train') data_dir = data_uri train_dir = train_uri if s3_data: temp_dir_obj = tempfile.TemporaryDirectory() data_uri = 's3://raster-vision-lf-dev/camvid/CamVid.zip' train_uri = 's3://raster-vision-lf-dev/camvid/train' train_dir = Path(temp_dir_obj.name) / 'train' data_dir = Path(temp_dir_obj.name) / 'data' make_dir(train_dir) make_dir(data_dir) # Retrieve data and remote training directory. if s3_data: print('Downloading data...') data_zip = Path(temp_dir_obj.name) / 'CamVid.zip' s3_utils.copy_from(data_uri, str(data_zip)) zip_ref = zipfile.ZipFile(data_zip, 'r') zip_ref.extractall(data_dir) zip_ref.close() data_dir = data_dir / 'CamVid' if s3_utils.list_paths(train_uri): print('Syncing train dir...') s3_utils.sync_to_dir(train_uri, str(train_dir)) # Setup data loader. def get_y_fn(x): return Path(str(x.parent) + 'annot') / x.name fnames = get_image_files(data_dir / 'val') img = open_image(fnames[0]) src_size = np.array(img.data.shape[1:]) size = src_size // 2 data = (SegmentationItemList.from_folder(data_dir).use_partial_data( sample_pct, seed).split_by_folder(valid='val').label_from_func( get_y_fn, classes=codes).transform( get_transforms(), size=size, tfm_y=True).databunch(bs=bs).normalize(imagenet_stats)) # Setup metrics, callbacks, and then train model. metrics = [acc_camvid] model_path = train_dir / 'stage-1' log_path = train_dir / 'log' learn = unet_learner(data, model_arch, metrics=metrics, wd=wd, bottle=True) learn.unfreeze() if fp16 and torch.cuda.is_available(): # This loss_scale works for Resnet 34 and 50. You might need to adjust this # for other models. learn = learn.to_fp16(loss_scale=256) start_epoch = 1 if os.path.isfile(str(model_path) + '.pth'): print('Loading saved model...') start_epoch = get_last_epoch(str(log_path) + '.csv') + 1 if start_epoch > num_epochs: print( 'Training already done. If you would like to re-train, delete ' 'previous results of training in {}.'.format(train_dir)) exit() learn.load(model_path) print('Resuming from epoch {}'.format(start_epoch)) print('Note: fastai does not support a start_epoch, so epoch 1 below ' 'corresponds to {}'.format(start_epoch)) callbacks = [ SaveModelCallback(learn, name=model_path), MyCSVLogger(learn, filename=log_path, start_epoch=start_epoch) ] if s3_data: callbacks.append(S3SyncCallback(train_dir, train_uri, sync_interval)) epochs_left = num_epochs - start_epoch + 1 lrs = slice(lr / 100, lr) learn.fit_one_cycle(epochs_left, lrs, pct_start=0.8, callbacks=callbacks) if s3_data: s3_utils.sync_from_dir(train_dir, train_uri)
def train(self, tmp_dir): """Train a model.""" # Setup hyperparams. bs = int(self.config.get('bs', 8)) wd = self.config.get('wd', 1e-2) lr = self.config.get('lr', 2e-3) num_epochs = int(self.config.get('num_epochs', 10)) model_arch = self.config.get('model_arch', 'resnet50') model_arch = getattr(models, model_arch) fp16 = self.config.get('fp16', False) sync_interval = self.config.get('sync_interval', 1) debug = self.config.get('debug', False) chip_uri = self.config['chip_uri'] train_uri = self.config['train_uri'] # Sync output of previous training run from cloud. train_dir = get_local_path(train_uri, tmp_dir) make_dir(train_dir) sync_from_dir(train_uri, train_dir) # Get zip file for each group, and unzip them into chip_dir. chip_dir = join(tmp_dir, 'chips') make_dir(chip_dir) for zip_uri in list_paths(chip_uri, 'zip'): zip_path = download_if_needed(zip_uri, tmp_dir) with zipfile.ZipFile(zip_path, 'r') as zipf: zipf.extractall(chip_dir) # Setup data loader. def get_label_path(im_path): return Path(str(im_path.parent)[:-4] + '-labels') / im_path.name size = self.task_config.chip_size classes = ['nodata'] + self.task_config.class_map.get_class_names() data = (SegmentationItemList.from_folder(chip_dir).split_by_folder( train='train-img', valid='val-img').label_from_func( get_label_path, classes=classes).transform(get_transforms(), size=size, tfm_y=True).databunch(bs=bs)) print(data) if debug: # We make debug chips during the run-time of the train command # rather than the chip command # because this is a better test (see "visualize just before the net" # in https://karpathy.github.io/2019/04/25/recipe/), and because # it's more convenient since we have the databunch here. # TODO make color map based on colors in class_map # TODO get rid of white frame # TODO zip them def _make_debug_chips(split): debug_chips_dir = join(train_uri, '{}-debug-chips'.format(split)) make_dir(debug_chips_dir) ds = data.train_ds if split == 'train' else data.valid_ds for i, (x, y) in enumerate(ds): x.show(y=y) plt.savefig(join(debug_chips_dir, '{}.png'.format(i))) plt.close() _make_debug_chips('train') _make_debug_chips('val') # Setup learner. metrics = [semseg_acc] learn = unet_learner(data, model_arch, metrics=metrics, wd=wd, bottle=True) learn.unfreeze() if fp16 and torch.cuda.is_available(): # This loss_scale works for Resnet 34 and 50. You might need to adjust this # for other models. learn = learn.to_fp16(loss_scale=256) # Setup ability to resume training if model exists. # This hack won't properly set the learning as a function of epochs # when resuming. learner_path = join(train_dir, 'learner.pth') log_path = join(train_dir, 'log') start_epoch = 0 if isfile(learner_path): print('Loading saved model...') start_epoch = get_last_epoch(str(log_path) + '.csv') + 1 if start_epoch >= num_epochs: print('Training is already done. If you would like to re-train' ', delete the previous results of training in ' '{}.'.format(train_uri)) return learn.load(learner_path[:-4]) print('Resuming from epoch {}'.format(start_epoch)) print( 'Note: fastai does not support a start_epoch, so epoch 0 below ' 'corresponds to {}'.format(start_epoch)) epochs_left = num_epochs - start_epoch # Setup callbacks and train model. callbacks = [ SaveModelCallback(learn, name=learner_path[:-4]), MyCSVLogger(learn, filename=log_path, start_epoch=start_epoch), SyncCallback(train_dir, train_uri, sync_interval) ] learn.fit(epochs_left, lr, callbacks=callbacks) # Export model for inference model_uri = self.config['model_uri'] model_path = get_local_path(model_uri, tmp_dir) learn.export(model_path) # Sync output to cloud. sync_to_dir(train_dir, train_uri)
reset_index().rename(columns={'index': 'img_id', 'Image_Label': 'count'}) all_dice_scores = [] test_preds = np.zeros((len(unique_test_images), len(codes), size[0], size[1]), dtype=np.float32) #Loss metrics dice_50 = partial(multiclass_dice_threshold, threshold=0.50) currentFold = 0 for train_index, valid_index in skf.split(id_mask_count['img_id'].values, id_mask_count['count']): src = (SegmentationItemList.from_df( unique_images, DATA / ('train_images' + str(SUFFIX)), cols='im_id').split_by_idx(valid_index).label_from_func(get_y_fn, classes=codes)) transforms = get_transforms(max_warp=0, max_rotate=0) data = (src.transform(get_transforms(), tfm_y=True, size=training_image_size, resize_method=ResizeMethod.PAD, padding_mode="zeros").databunch( bs=batch_size).normalize(imagenet_stats)) learn = unet_learner(data, models.resnet34, pretrained=True, metrics=[multiclass_dice, dice_50], loss_func=BCEDiceLoss(),