Esempio n. 1
0
    def get_datasets(
        self,
        stage: str,
        datapath: str = None,
        in_csv: str = None,
        in_csv_train: str = None,
        in_csv_valid: str = None,
        in_csv_infer: str = None,
        train_folds: str = None,
        valid_folds: str = None,
        tag2class: str = None,
        class_column: str = None,
        tag_column: str = None,
        folds_seed: int = 42,
        n_folds: int = 5,
        image_size: int = 256,
    ):
        datasets = collections.OrderedDict()
        tag2class = (json.load(open(tag2class))
                     if tag2class is not None else None)

        df, df_train, df_valid, df_infer = read_csv_data(
            in_csv=in_csv,
            in_csv_train=in_csv_train,
            in_csv_valid=in_csv_valid,
            in_csv_infer=in_csv_infer,
            train_folds=train_folds,
            valid_folds=valid_folds,
            tag2class=tag2class,
            class_column=class_column,
            tag_column=tag_column,
            seed=folds_seed,
            n_folds=n_folds,
        )

        open_fn = ReaderCompose(readers=[
            ImageReader(
                input_key="images", output_key="image", datapath=datapath),
            MaskReader(input_key="masks", output_key="mask",
                       datapath=datapath),
            ScalarReader(
                input_key="name",
                output_key="name",
                default_value=-1,
                dtype=str,
            ),
        ])

        for mode, source in zip(("train", "valid", "infer"),
                                (df_train, df_valid, df_infer)):
            if len(source) > 0:
                datasets[mode] = ListDataset(
                    list_data=source,
                    open_fn=open_fn,
                    dict_transform=self.get_transforms(stage=stage,
                                                       mode=mode,
                                                       image_size=image_size),
                )

        return datasets
Esempio n. 2
0
    def get_datasets(
        self,
        stage: str,
        datapath: str = None,
        in_csv: str = None,
        in_csv_train: str = None,
        in_csv_valid: str = None,
        in_csv_infer: str = None,
        train_folds: str = None,
        valid_folds: str = None,
        tag2class: str = None,
        class_column: str = None,
        tag_column: str = None,
        folds_seed: int = 42,
        n_folds: int = 5,
        one_hot_classes: bool = None,
        num_frames: int = None,
        num_segments: int = None,
        time_window: int = None,
        uniform_time_sample: bool = False,
    ):
        datasets = collections.OrderedDict()
        tag2class = json.load(open(tag2class)) \
            if tag2class is not None \
            else None

        df, df_train, df_valid, df_infer = read_csv_data(
            in_csv=in_csv,
            in_csv_train=in_csv_train,
            in_csv_valid=in_csv_valid,
            in_csv_infer=in_csv_infer,
            train_folds=train_folds,
            valid_folds=valid_folds,
            tag2class=tag2class,
            class_column=class_column,
            tag_column=tag_column,
            seed=folds_seed,
            n_folds=n_folds)

        df_valid = preprocess_valid_data(df_valid)

        open_fn = [
            ScalarReader(input_key="class",
                         output_key="targets",
                         default_value=-1,
                         dtype=np.int64)
        ]
        if one_hot_classes:
            open_fn.append(
                ScalarReader(input_key="class",
                             output_key="targets_one_hot",
                             default_value=-1,
                             dtype=np.int64,
                             one_hot_classes=one_hot_classes))

        open_fn_val = open_fn.copy()
        open_fn.append(
            VideoImageReader(input_key="filepath",
                             output_key="features",
                             datapath=datapath,
                             num_frames=num_frames,
                             num_segments=num_segments,
                             time_window=time_window,
                             uniform_time_sample=uniform_time_sample))
        open_fn_val.append(
            VideoImageReader(input_key="filepath",
                             output_key="features",
                             datapath=datapath,
                             num_frames=num_frames,
                             num_segments=num_segments,
                             time_window=time_window,
                             uniform_time_sample=uniform_time_sample,
                             with_offset=True))

        open_fn = ReaderCompose(readers=open_fn)
        open_fn_val = ReaderCompose(readers=open_fn_val)

        for source, mode in zip((df_train, df_valid, df_infer),
                                ("train", "valid", "infer")):
            if len(source) > 0:
                dataset = ListDataset(
                    source,
                    open_fn=open_fn_val if mode == "valid" else open_fn,
                    dict_transform=self.get_transforms(stage=stage, mode=mode),
                )
                dataset_dict = {"dataset": dataset}
                if mode == "train":
                    labels = [x["class"] for x in df_train]
                    sampler = BalanceClassSampler(labels, mode="upsampling")
                    dataset_dict['sampler'] = sampler

                datasets[mode] = dataset_dict
        return datasets
Esempio n. 3
0
    def get_datasets(
        self,
        stage: str,
        datapath: str = None,
        in_csv: str = None,
        in_csv_train: str = None,
        in_csv_valid: str = None,
        in_csv_infer: str = None,
        train_folds: str = None,
        valid_folds: str = None,
        tag2class: str = None,
        class_column: str = None,
        tag_column: str = None,
        folds_seed: int = 42,
        n_folds: int = 5,
        image_size: int = 256,
    ):
        datasets = collections.OrderedDict()
        tag2class = (json.load(open(tag2class))
                     if tag2class is not None else None)

        df, df_train, df_valid, df_infer = read_csv_data(
            in_csv=in_csv,
            in_csv_train=in_csv_train,
            in_csv_valid=in_csv_valid,
            in_csv_infer=in_csv_infer,
            train_folds=train_folds,
            valid_folds=valid_folds,
            tag2class=tag2class,
            class_column=class_column,
            tag_column=tag_column,
            seed=folds_seed,
            n_folds=n_folds,
        )

        import cv2
        import os

        def encode_fn_lambda(fname, datapath):
            return (cv2.cvtColor(cv2.imread(os.path.join(datapath, fname)),
                                 cv2.COLOR_BGR2GRAY) // 255)[:, :, None]

        open_fn = ReaderCompose(readers=[
            ImageReader(
                input_key="images", output_key="image", datapath=datapath),
            LambdaReader(input_key="masks",
                         output_key="mask",
                         datapath=datapath,
                         encode_fn=encode_fn_lambda),
            # MaskReader(
            #     input_key="masks", output_key="mask", datapath=datapath
            # ),
            ScalarReader(
                input_key="name",
                output_key="name",
                default_value=-1,
                dtype=str,
            ),
        ])

        for mode, source in zip(("train", "valid", "infer"),
                                (df_train, df_valid, df_infer)):
            if len(source) > 0:
                datasets[mode] = ListDataset(
                    list_data=source,
                    open_fn=open_fn,
                    dict_transform=self.get_transforms(stage=stage,
                                                       mode=mode,
                                                       image_size=image_size),
                )

        # fff = datasets["train"][0]

        return datasets
Esempio n. 4
0
    def get_datasets(
        self,
        stage: str,
        datapath: str = None,
        in_csv: str = None,
        in_csv_train: str = None,
        in_csv_valid: str = None,
        in_csv_infer: str = None,
        train_folds: str = None,
        valid_folds: str = None,
        tag2class: str = None,
        class_column: str = "class",
        input_column: str = "filepath",
        tag_column: str = None,
        folds_seed: int = 42,
        n_folds: int = 5,
        one_hot_classes: int = None,
        upsampling: bool = False,
        image_size: int = 224,
        crop_from_gray: bool = False,
        circle_crop: bool = False,
        normalize: bool = True,
        ben_preprocess: int = 10,
        hor_flip: float = 0.5,
        ver_flip: float = 0.2,
        rotate: int = 120,
        random_scale: int = 0.3, 
        random_scale_p: int = 0.75,
        brightness: float = 0.2, 
        contrast: float = 0.2,
        color_p: float = 0.5,              
    ):
        datasets = collections.OrderedDict()
        tag2class = json.load(open(tag2class)) if tag2class is not None else None

        df, df_train, df_valid, df_infer = read_csv_data(
            in_csv=in_csv,
            in_csv_train=in_csv_train,
            in_csv_valid=in_csv_valid,
            in_csv_infer=in_csv_infer,
            train_folds=train_folds,
            valid_folds=valid_folds,
            tag2class=tag2class,
            class_column=class_column,
            tag_column=tag_column,
            seed=folds_seed,
            n_folds=n_folds
        )

        for source, mode in zip(
            (df_train, df_valid, df_infer), ("train", "valid", "infer")
        ):
            if len(source) > 0:
                transforms = self.get_transforms(
                    stage=stage,
                    mode=mode,
                    image_size=image_size,
                    one_hot_classes=one_hot_classes,
                    crop_from_gray=crop_from_gray,
                    circle_crop=circle_crop,
                    normalize=normalize,
                    ben_preprocess=ben_preprocess,
                    hor_flip=hor_flip,
                    ver_flip=ver_flip,
                    rotate=rotate,
                    random_scale=random_scale, 
                    random_scale_p=random_scale_p,
                    brightness=brightness, 
                    contrast=contrast,
                    color_p=color_p,                  
                )
                if mode == "valid":
                    dataset = RetinopathyDatasetTrain(pd.DataFrame(source), "./data/train_images", transforms)
                else:
                    dataset = RetinopathyDatasetTrain(pd.DataFrame(source), datapath, transforms)
                if upsampling is True and mode == "train":
                    labels = [x[class_column] for x in source]
                    sampler = BalanceClassSampler(labels, mode="upsampling")
                    dataset = {"dataset": dataset, "sampler": sampler}
                datasets[mode] = dataset

        return datasets
Esempio n. 5
0
    def get_datasets(self,
                     stage: str,
                     datapath: str = None,
                     in_csv: str = None,
                     in_csv_train: str = None,
                     in_csv_valid: str = None,
                     in_csv_infer: str = None,
                     train_folds: str = None,
                     valid_folds: str = None,
                     tag2class: str = None,
                     class_column: str = None,
                     tag_column: str = None,
                     folds_seed: int = 42,
                     n_folds: int = 5,
                     one_hot_classes: int = None,
                     image_size: int = 224,
                     balance_strategy: str = "upsample"):
        datasets = collections.OrderedDict()
        tag2class = json.load(open(tag2class)) \
            if tag2class is not None \
            else None

        df, df_train, df_valid, df_infer = read_csv_data(
            in_csv=in_csv,
            in_csv_train=in_csv_train,
            in_csv_valid=in_csv_valid,
            in_csv_infer=in_csv_infer,
            train_folds=train_folds,
            valid_folds=valid_folds,
            tag2class=tag2class,
            class_column=class_column,
            tag_column=tag_column,
            seed=folds_seed,
            n_folds=n_folds)

        open_fn = [
            ImageReader(input_key="filepath",
                        output_key="image",
                        datapath=datapath),
            ScalarReader(input_key="class",
                         output_key="targets",
                         default_value=-1,
                         dtype=np.int64)
        ]

        if one_hot_classes:
            open_fn.append(
                ScalarReader(input_key="class",
                             output_key="targets_one_hot",
                             default_value=-1,
                             dtype=np.int64,
                             one_hot_classes=one_hot_classes))

        open_fn = ReaderCompose(readers=open_fn)

        for source, mode in zip((df_train, df_valid, df_infer),
                                ("train", "valid", "infer")):
            if len(source) > 0:
                dataset = ListDataset(
                    source,
                    open_fn=open_fn,
                    dict_transform=self.get_transforms(
                        stage=stage,
                        mode=mode,
                        image_size=image_size,
                        one_hot_classes=one_hot_classes),
                )
                if mode == "train":
                    labels = [x["class"] for x in source]
                    sampler = BalanceClassSampler(labels,
                                                  mode=balance_strategy)
                    dataset = {"dataset": dataset, "sampler": sampler}
                datasets[mode] = dataset

        return datasets