コード例 #1
0
    def get_datasets(
        self,
        stage: str,
        datapath: str = None,
        in_csv: str = None,
        in_csv_train: str = None,
        in_csv_valid: str = None,
        in_csv_infer: str = None,
        train_folds: str = None,
        valid_folds: str = None,
        tag2class: str = None,
        class_column: str = None,
        tag_column: str = None,
        folds_seed: int = 42,
        n_folds: int = 5,
        image_size: int = 256,
    ):
        datasets = collections.OrderedDict()
        tag2class = (json.load(open(tag2class))
                     if tag2class is not None else None)

        df, df_train, df_valid, df_infer = read_csv_data(
            in_csv=in_csv,
            in_csv_train=in_csv_train,
            in_csv_valid=in_csv_valid,
            in_csv_infer=in_csv_infer,
            train_folds=train_folds,
            valid_folds=valid_folds,
            tag2class=tag2class,
            class_column=class_column,
            tag_column=tag_column,
            seed=folds_seed,
            n_folds=n_folds,
        )

        open_fn = ReaderCompose(readers=[
            ImageReader(
                input_key="images", output_key="image", datapath=datapath),
            MaskReader(input_key="masks", output_key="mask",
                       datapath=datapath),
            ScalarReader(
                input_key="name",
                output_key="name",
                default_value=-1,
                dtype=str,
            ),
        ])

        for mode, source in zip(("train", "valid", "infer"),
                                (df_train, df_valid, df_infer)):
            if len(source) > 0:
                datasets[mode] = ListDataset(
                    list_data=source,
                    open_fn=open_fn,
                    dict_transform=self.get_transforms(stage=stage,
                                                       mode=mode,
                                                       image_size=image_size),
                )

        return datasets
コード例 #2
0
    def get_datasets(
        self,
        subvolume_shape: List[int],
        volume_shape: List[int],
        stage: str,
        in_csv_train: str = None,
        in_csv_valid: str = None,
        in_csv_infer: str = None,
        n_samples: int = 100,
        max_batch_size: int = 3,
    ):
        """
        Args:
            subvolume_shape: dimention of subvolume
            volume_shape: dimention of volume
            stage (str)
            in_csv_train (str)
            in_csv_valid (str)
            in_csv_infer (str)
        """
        df, df_train, df_valid, df_infer = read_csv_data(
            in_csv_train=in_csv_train,
            in_csv_valid=in_csv_valid,
            in_csv_infer=in_csv_infer,
        )

        datasets = {}
        open_fn = ReaderCompose(readers=[
            NiftiReader_Image(input_key="images", output_key="images"),
            NiftiReader_Mask(input_key="labels", output_key="labels"),
        ])

        for mode, source in zip(("train", "valid"), (df_train, df_valid)):
            if source is not None and len(source) > 0:

                datasets[mode] = {
                    "dataset":
                    BrainDataset(
                        list_data=source,
                        list_shape=volume_shape,
                        list_sub_shape=subvolume_shape,
                        open_fn=open_fn,
                        dict_transform=self.get_transforms(stage=stage,
                                                           mode=mode),
                        mode=mode,
                        n_samples=n_samples,
                        input_key="images",
                        output_key="labels",
                    ),
                    "collate_fn":
                    CollateGeneratorFn(max_batch_size),
                }

        return datasets
コード例 #3
0
ファイル: dataset.py プロジェクト: catalyst-team/dl-course
def get_reader(num_classes: int = 2) -> ReaderCompose:
    return ReaderCompose([
        ImageReader(input_key="filepath", output_key="image", rootpath="."),
        ScalarReader(
            input_key="label",
            output_key="targets",
            default_value=-1,
            dtype=np.int64,
        ),
        ScalarReader(
            input_key="label",
            output_key="targets_one_hot",
            default_value=-1,
            dtype=np.int64,
            one_hot_classes=num_classes,
        ),
    ])
コード例 #4
0
 def get_open_fn(self, dataset_info, datapath, datapath_prefix=None):
     if datapath_prefix is not None:
         datapath = os.path.join(datapath, datapath_prefix)
     open_fn = ReaderCompose(readers=[
         FineImageReader(
             input_key="image", output_key="image", datapath=datapath),
         object_file_readers[dataset_info["objects_format"]](
             input_key="objects",
             output_key="objects",
             datapath=datapath,
             markup_name2class_name=self.tm_info.alias2name,
             markup_name2id=self.tm_info.alias2dc),
         ScalarReader(
             input_key="ID",
             output_key="image_name",
             default_value=-1,
             dtype=str,
         ),
     ])
     return open_fn
コード例 #5
0
    def get_datasets(
        self,
        stage: str,
        datapath: str = None,
        in_csv: str = None,
        in_csv_train: str = None,
        in_csv_valid: str = None,
        in_csv_infer: str = None,
        train_folds: str = None,
        valid_folds: str = None,
        tag2class: str = None,
        class_column: str = None,
        tag_column: str = None,
        folds_seed: int = 42,
        n_folds: int = 5,
        one_hot_classes: int = None,
        balance_strategy: str = "upsampling",
    ):
        datasets = collections.OrderedDict()
        tag2class = safitty.load(tag2class) if tag2class is not None else None

        df, df_train, df_valid, df_infer = read_csv_data(
            in_csv=in_csv,
            in_csv_train=in_csv_train,
            in_csv_valid=in_csv_valid,
            in_csv_infer=in_csv_infer,
            train_folds=train_folds,
            valid_folds=valid_folds,
            tag2class=tag2class,
            class_column=class_column,
            tag_column=tag_column,
            seed=folds_seed,
            n_folds=n_folds,
        )

        open_fn = [
            ImageReader(
                input_key="filepath", output_key="image", rootpath=datapath
            )
        ]

        if stage.startswith('infer'):
            open_fn.append(ScalarReader(
                input_key="filepath",
                output_key="filepath",
                default_value=-1,
                dtype=np.str,
            ))
        else:
            open_fn.append(ScalarReader(
                input_key="class",
                output_key="targets",
                default_value=-1,
                dtype=np.int64,
            ))

            if one_hot_classes:
                open_fn.append(
                    ScalarReader(
                        input_key="class",
                        output_key="targets_one_hot",
                        default_value=-1,
                        dtype=np.int64,
                        one_hot_classes=one_hot_classes,
                    )
                )

        open_fn = ReaderCompose(readers=open_fn)

        for source, mode in zip(
            (df_train, df_valid, df_infer), ("train", "valid", "infer")
        ):
            if source is not None and len(source) > 0:
                dataset = ListDataset(
                    source,
                    open_fn=open_fn,
                    dict_transform=self.get_transforms(
                        stage=stage, dataset=mode
                    ),
                )
                if mode == "train":
                    labels = [x["class"] for x in source]
                    sampler = BalanceClassSampler(
                        labels, mode=balance_strategy
                    )
                    dataset = {"dataset": dataset, "sampler": sampler}
                datasets[mode] = dataset

        if stage == 'infer':
            datasets['infer'] = datasets['valid']
            del datasets['valid']
            if 'train' in datasets:
                del datasets['train']


        return datasets
コード例 #6
0
    def get_datasets(
        self,
        stage: str,
        datapath: Optional[str] = None,
        in_csv: Optional[str] = None,
        in_csv_train: Optional[str] = None,
        in_csv_valid: Optional[str] = None,
        in_csv_infer: Optional[str] = None,
        train_folds: Optional[str] = None,
        valid_folds: Optional[str] = None,
        tag2class: Optional[str] = None,
        class_column: Optional[str] = None,
        tag_column: Optional[str] = None,
        folds_seed: int = 42,
        n_folds: int = 5,
    ):
        """Returns the datasets for a given stage and epoch.

        Args:
            stage (str): stage name of interest,
                like "pretrain" / "train" / "finetune" / etc
            datapath (str): path to folder with images and masks
            in_csv (Optional[str]): path to CSV annotation file. Look at
                :func:`catalyst.contrib.utils.pandas.read_csv_data` for details
            in_csv_train (Optional[str]): path to CSV annotaion file
                with train samples.
            in_csv_valid (Optional[str]): path to CSV annotaion file
                with the validation samples
            in_csv_infer (Optional[str]): path to CSV annotaion file
                with test samples
            train_folds (Optional[str]): folds to use for training
            valid_folds (Optional[str]): folds to use for validation
            tag2class (Optional[str]): path to JSON file with mapping from
                class name (tag) to index
            class_column (Optional[str]): name of class index column in the CSV
            tag_column (Optional[str]): name of class name in the CSV file
            folds_seed (int): random seed to use
            n_folds (int): number of folds on which data will be split

        Returns:
            Dict: dictionary with datasets for current stage.
        """
        datasets = collections.OrderedDict()
        tag2class = (
            json.load(open(tag2class)) if tag2class is not None else None
        )

        df, df_train, df_valid, df_infer = read_csv_data(
            in_csv=in_csv,
            in_csv_train=in_csv_train,
            in_csv_valid=in_csv_valid,
            in_csv_infer=in_csv_infer,
            train_folds=train_folds,
            valid_folds=valid_folds,
            tag2class=tag2class,
            class_column=class_column,
            tag_column=tag_column,
            seed=folds_seed,
            n_folds=n_folds,
        )

        open_fn = ReaderCompose(
            readers=[
                ImageReader(
                    input_key="images", output_key="image", rootpath=datapath
                ),
                MaskReader(
                    input_key="masks", output_key="mask", rootpath=datapath
                ),
                ScalarReader(
                    input_key="name",
                    output_key="name",
                    dtype=str,
                    default_value=-1,
                ),
            ]
        )

        for mode, source in zip(
            ("train", "valid", "infer"), (df_train, df_valid, df_infer)
        ):
            if source is not None and len(source) > 0:
                datasets[mode] = ListDataset(
                    list_data=source,
                    open_fn=open_fn,
                    dict_transform=self.get_transforms(
                        stage=stage, dataset=mode
                    ),
                )

        return datasets
コード例 #7
0
    def get_datasets(
        self,
        stage: str,
        datapath: str = None,
        in_csv: str = None,
        in_csv_train: str = None,
        in_csv_valid: str = None,
        in_csv_infer: str = None,
        train_folds: str = None,
        valid_folds: str = None,
        tag2class: str = None,
        class_column: str = None,
        tag_column: str = None,
        folds_seed: int = 42,
        n_folds: int = 5,
        image_size: int = 256,
    ):
        datasets = collections.OrderedDict()
        tag2class = (json.load(open(tag2class))
                     if tag2class is not None else None)

        df, df_train, df_valid, df_infer = read_csv_data(
            in_csv=in_csv,
            in_csv_train=in_csv_train,
            in_csv_valid=in_csv_valid,
            in_csv_infer=in_csv_infer,
            train_folds=train_folds,
            valid_folds=valid_folds,
            tag2class=tag2class,
            class_column=class_column,
            tag_column=tag_column,
            seed=folds_seed,
            n_folds=n_folds,
        )

        import cv2
        import os

        def encode_fn_lambda(fname, datapath):
            return (cv2.cvtColor(cv2.imread(os.path.join(datapath, fname)),
                                 cv2.COLOR_BGR2GRAY) // 255)[:, :, None]

        open_fn = ReaderCompose(readers=[
            ImageReader(
                input_key="images", output_key="image", datapath=datapath),
            LambdaReader(input_key="masks",
                         output_key="mask",
                         datapath=datapath,
                         encode_fn=encode_fn_lambda),
            # MaskReader(
            #     input_key="masks", output_key="mask", datapath=datapath
            # ),
            ScalarReader(
                input_key="name",
                output_key="name",
                default_value=-1,
                dtype=str,
            ),
        ])

        for mode, source in zip(("train", "valid", "infer"),
                                (df_train, df_valid, df_infer)):
            if len(source) > 0:
                datasets[mode] = ListDataset(
                    list_data=source,
                    open_fn=open_fn,
                    dict_transform=self.get_transforms(stage=stage,
                                                       mode=mode,
                                                       image_size=image_size),
                )

        # fff = datasets["train"][0]

        return datasets
コード例 #8
0
def get_loaders(
    random_state: int,
    volume_shape: List[int],
    subvolume_shape: List[int],
    train_subvolumes: int = 128,
    infer_subvolumes: int = 512,
    in_csv_train: str = None,
    in_csv_valid: str = None,
    in_csv_infer: str = None,
    batch_size: int = 16,
    num_workers: int = 10,
) -> dict:
    """Get Dataloaders"""
    datasets = {}
    open_fn = ReaderCompose([
        NiftiFixedVolumeReader(input_key="images", output_key="images"),
        NiftiReader(input_key="nii_labels", output_key="targets"),
    ])

    for mode, source in zip(
        ("train", "validation", "infer"),
        (in_csv_train, in_csv_valid, in_csv_infer),
    ):
        if mode == "infer":
            n_subvolumes = infer_subvolumes
        else:
            n_subvolumes = train_subvolumes

        if source is not None and len(source) > 0:
            dataset = BrainDataset(
                list_data=dataframe_to_list(pd.read_csv(source)),
                list_shape=volume_shape,
                list_sub_shape=subvolume_shape,
                open_fn=open_fn,
                n_subvolumes=n_subvolumes,
                mode=mode,
                input_key="images",
                output_key="targets",
            )

        datasets[mode] = {"dataset": dataset}

    def worker_init_fn(worker_id):
        np.random.seed(np.random.get_state()[1][0] + worker_id)

    train_loader = DataLoader(
        dataset=datasets["train"]["dataset"],
        batch_size=batch_size,
        shuffle=True,
        worker_init_fn=worker_init_fn,
        num_workers=16,
        pin_memory=True,
    )
    valid_loader = DataLoader(
        dataset=datasets["validation"]["dataset"],
        shuffle=True,
        worker_init_fn=worker_init_fn,
        batch_size=batch_size,
        num_workers=16,
        pin_memory=True,
        drop_last=True,
    )
    test_loader = DataLoader(
        dataset=datasets["infer"]["dataset"],
        batch_size=batch_size,
        worker_init_fn=worker_init_fn,
        num_workers=16,
        pin_memory=True,
        drop_last=True,
    )
    train_loaders = collections.OrderedDict()
    infer_loaders = collections.OrderedDict()
    train_loaders["train"] = BatchPrefetchLoaderWrapper(train_loader)
    train_loaders["valid"] = BatchPrefetchLoaderWrapper(valid_loader)
    infer_loaders["infer"] = BatchPrefetchLoaderWrapper(test_loader)

    return train_loaders, infer_loaders