def get_datasets( self, stage: str, datapath: str = None, in_csv: str = None, in_csv_train: str = None, in_csv_valid: str = None, in_csv_infer: str = None, train_folds: str = None, valid_folds: str = None, tag2class: str = None, class_column: str = None, tag_column: str = None, folds_seed: int = 42, n_folds: int = 5, image_size: int = 256, ): datasets = collections.OrderedDict() tag2class = (json.load(open(tag2class)) if tag2class is not None else None) df, df_train, df_valid, df_infer = read_csv_data( in_csv=in_csv, in_csv_train=in_csv_train, in_csv_valid=in_csv_valid, in_csv_infer=in_csv_infer, train_folds=train_folds, valid_folds=valid_folds, tag2class=tag2class, class_column=class_column, tag_column=tag_column, seed=folds_seed, n_folds=n_folds, ) open_fn = ReaderCompose(readers=[ ImageReader( input_key="images", output_key="image", datapath=datapath), MaskReader(input_key="masks", output_key="mask", datapath=datapath), ScalarReader( input_key="name", output_key="name", default_value=-1, dtype=str, ), ]) for mode, source in zip(("train", "valid", "infer"), (df_train, df_valid, df_infer)): if len(source) > 0: datasets[mode] = ListDataset( list_data=source, open_fn=open_fn, dict_transform=self.get_transforms(stage=stage, mode=mode, image_size=image_size), ) return datasets
def get_datasets( self, subvolume_shape: List[int], volume_shape: List[int], stage: str, in_csv_train: str = None, in_csv_valid: str = None, in_csv_infer: str = None, n_samples: int = 100, max_batch_size: int = 3, ): """ Args: subvolume_shape: dimention of subvolume volume_shape: dimention of volume stage (str) in_csv_train (str) in_csv_valid (str) in_csv_infer (str) """ df, df_train, df_valid, df_infer = read_csv_data( in_csv_train=in_csv_train, in_csv_valid=in_csv_valid, in_csv_infer=in_csv_infer, ) datasets = {} open_fn = ReaderCompose(readers=[ NiftiReader_Image(input_key="images", output_key="images"), NiftiReader_Mask(input_key="labels", output_key="labels"), ]) for mode, source in zip(("train", "valid"), (df_train, df_valid)): if source is not None and len(source) > 0: datasets[mode] = { "dataset": BrainDataset( list_data=source, list_shape=volume_shape, list_sub_shape=subvolume_shape, open_fn=open_fn, dict_transform=self.get_transforms(stage=stage, mode=mode), mode=mode, n_samples=n_samples, input_key="images", output_key="labels", ), "collate_fn": CollateGeneratorFn(max_batch_size), } return datasets
def get_reader(num_classes: int = 2) -> ReaderCompose: return ReaderCompose([ ImageReader(input_key="filepath", output_key="image", rootpath="."), ScalarReader( input_key="label", output_key="targets", default_value=-1, dtype=np.int64, ), ScalarReader( input_key="label", output_key="targets_one_hot", default_value=-1, dtype=np.int64, one_hot_classes=num_classes, ), ])
def get_open_fn(self, dataset_info, datapath, datapath_prefix=None): if datapath_prefix is not None: datapath = os.path.join(datapath, datapath_prefix) open_fn = ReaderCompose(readers=[ FineImageReader( input_key="image", output_key="image", datapath=datapath), object_file_readers[dataset_info["objects_format"]]( input_key="objects", output_key="objects", datapath=datapath, markup_name2class_name=self.tm_info.alias2name, markup_name2id=self.tm_info.alias2dc), ScalarReader( input_key="ID", output_key="image_name", default_value=-1, dtype=str, ), ]) return open_fn
def get_datasets( self, stage: str, datapath: str = None, in_csv: str = None, in_csv_train: str = None, in_csv_valid: str = None, in_csv_infer: str = None, train_folds: str = None, valid_folds: str = None, tag2class: str = None, class_column: str = None, tag_column: str = None, folds_seed: int = 42, n_folds: int = 5, one_hot_classes: int = None, balance_strategy: str = "upsampling", ): datasets = collections.OrderedDict() tag2class = safitty.load(tag2class) if tag2class is not None else None df, df_train, df_valid, df_infer = read_csv_data( in_csv=in_csv, in_csv_train=in_csv_train, in_csv_valid=in_csv_valid, in_csv_infer=in_csv_infer, train_folds=train_folds, valid_folds=valid_folds, tag2class=tag2class, class_column=class_column, tag_column=tag_column, seed=folds_seed, n_folds=n_folds, ) open_fn = [ ImageReader( input_key="filepath", output_key="image", rootpath=datapath ) ] if stage.startswith('infer'): open_fn.append(ScalarReader( input_key="filepath", output_key="filepath", default_value=-1, dtype=np.str, )) else: open_fn.append(ScalarReader( input_key="class", output_key="targets", default_value=-1, dtype=np.int64, )) if one_hot_classes: open_fn.append( ScalarReader( input_key="class", output_key="targets_one_hot", default_value=-1, dtype=np.int64, one_hot_classes=one_hot_classes, ) ) open_fn = ReaderCompose(readers=open_fn) for source, mode in zip( (df_train, df_valid, df_infer), ("train", "valid", "infer") ): if source is not None and len(source) > 0: dataset = ListDataset( source, open_fn=open_fn, dict_transform=self.get_transforms( stage=stage, dataset=mode ), ) if mode == "train": labels = [x["class"] for x in source] sampler = BalanceClassSampler( labels, mode=balance_strategy ) dataset = {"dataset": dataset, "sampler": sampler} datasets[mode] = dataset if stage == 'infer': datasets['infer'] = datasets['valid'] del datasets['valid'] if 'train' in datasets: del datasets['train'] return datasets
def get_datasets( self, stage: str, datapath: Optional[str] = None, in_csv: Optional[str] = None, in_csv_train: Optional[str] = None, in_csv_valid: Optional[str] = None, in_csv_infer: Optional[str] = None, train_folds: Optional[str] = None, valid_folds: Optional[str] = None, tag2class: Optional[str] = None, class_column: Optional[str] = None, tag_column: Optional[str] = None, folds_seed: int = 42, n_folds: int = 5, ): """Returns the datasets for a given stage and epoch. Args: stage (str): stage name of interest, like "pretrain" / "train" / "finetune" / etc datapath (str): path to folder with images and masks in_csv (Optional[str]): path to CSV annotation file. Look at :func:`catalyst.contrib.utils.pandas.read_csv_data` for details in_csv_train (Optional[str]): path to CSV annotaion file with train samples. in_csv_valid (Optional[str]): path to CSV annotaion file with the validation samples in_csv_infer (Optional[str]): path to CSV annotaion file with test samples train_folds (Optional[str]): folds to use for training valid_folds (Optional[str]): folds to use for validation tag2class (Optional[str]): path to JSON file with mapping from class name (tag) to index class_column (Optional[str]): name of class index column in the CSV tag_column (Optional[str]): name of class name in the CSV file folds_seed (int): random seed to use n_folds (int): number of folds on which data will be split Returns: Dict: dictionary with datasets for current stage. """ datasets = collections.OrderedDict() tag2class = ( json.load(open(tag2class)) if tag2class is not None else None ) df, df_train, df_valid, df_infer = read_csv_data( in_csv=in_csv, in_csv_train=in_csv_train, in_csv_valid=in_csv_valid, in_csv_infer=in_csv_infer, train_folds=train_folds, valid_folds=valid_folds, tag2class=tag2class, class_column=class_column, tag_column=tag_column, seed=folds_seed, n_folds=n_folds, ) open_fn = ReaderCompose( readers=[ ImageReader( input_key="images", output_key="image", rootpath=datapath ), MaskReader( input_key="masks", output_key="mask", rootpath=datapath ), ScalarReader( input_key="name", output_key="name", dtype=str, default_value=-1, ), ] ) for mode, source in zip( ("train", "valid", "infer"), (df_train, df_valid, df_infer) ): if source is not None and len(source) > 0: datasets[mode] = ListDataset( list_data=source, open_fn=open_fn, dict_transform=self.get_transforms( stage=stage, dataset=mode ), ) return datasets
def get_datasets( self, stage: str, datapath: str = None, in_csv: str = None, in_csv_train: str = None, in_csv_valid: str = None, in_csv_infer: str = None, train_folds: str = None, valid_folds: str = None, tag2class: str = None, class_column: str = None, tag_column: str = None, folds_seed: int = 42, n_folds: int = 5, image_size: int = 256, ): datasets = collections.OrderedDict() tag2class = (json.load(open(tag2class)) if tag2class is not None else None) df, df_train, df_valid, df_infer = read_csv_data( in_csv=in_csv, in_csv_train=in_csv_train, in_csv_valid=in_csv_valid, in_csv_infer=in_csv_infer, train_folds=train_folds, valid_folds=valid_folds, tag2class=tag2class, class_column=class_column, tag_column=tag_column, seed=folds_seed, n_folds=n_folds, ) import cv2 import os def encode_fn_lambda(fname, datapath): return (cv2.cvtColor(cv2.imread(os.path.join(datapath, fname)), cv2.COLOR_BGR2GRAY) // 255)[:, :, None] open_fn = ReaderCompose(readers=[ ImageReader( input_key="images", output_key="image", datapath=datapath), LambdaReader(input_key="masks", output_key="mask", datapath=datapath, encode_fn=encode_fn_lambda), # MaskReader( # input_key="masks", output_key="mask", datapath=datapath # ), ScalarReader( input_key="name", output_key="name", default_value=-1, dtype=str, ), ]) for mode, source in zip(("train", "valid", "infer"), (df_train, df_valid, df_infer)): if len(source) > 0: datasets[mode] = ListDataset( list_data=source, open_fn=open_fn, dict_transform=self.get_transforms(stage=stage, mode=mode, image_size=image_size), ) # fff = datasets["train"][0] return datasets
def get_loaders( random_state: int, volume_shape: List[int], subvolume_shape: List[int], train_subvolumes: int = 128, infer_subvolumes: int = 512, in_csv_train: str = None, in_csv_valid: str = None, in_csv_infer: str = None, batch_size: int = 16, num_workers: int = 10, ) -> dict: """Get Dataloaders""" datasets = {} open_fn = ReaderCompose([ NiftiFixedVolumeReader(input_key="images", output_key="images"), NiftiReader(input_key="nii_labels", output_key="targets"), ]) for mode, source in zip( ("train", "validation", "infer"), (in_csv_train, in_csv_valid, in_csv_infer), ): if mode == "infer": n_subvolumes = infer_subvolumes else: n_subvolumes = train_subvolumes if source is not None and len(source) > 0: dataset = BrainDataset( list_data=dataframe_to_list(pd.read_csv(source)), list_shape=volume_shape, list_sub_shape=subvolume_shape, open_fn=open_fn, n_subvolumes=n_subvolumes, mode=mode, input_key="images", output_key="targets", ) datasets[mode] = {"dataset": dataset} def worker_init_fn(worker_id): np.random.seed(np.random.get_state()[1][0] + worker_id) train_loader = DataLoader( dataset=datasets["train"]["dataset"], batch_size=batch_size, shuffle=True, worker_init_fn=worker_init_fn, num_workers=16, pin_memory=True, ) valid_loader = DataLoader( dataset=datasets["validation"]["dataset"], shuffle=True, worker_init_fn=worker_init_fn, batch_size=batch_size, num_workers=16, pin_memory=True, drop_last=True, ) test_loader = DataLoader( dataset=datasets["infer"]["dataset"], batch_size=batch_size, worker_init_fn=worker_init_fn, num_workers=16, pin_memory=True, drop_last=True, ) train_loaders = collections.OrderedDict() infer_loaders = collections.OrderedDict() train_loaders["train"] = BatchPrefetchLoaderWrapper(train_loader) train_loaders["valid"] = BatchPrefetchLoaderWrapper(valid_loader) infer_loaders["infer"] = BatchPrefetchLoaderWrapper(test_loader) return train_loaders, infer_loaders