Beispiel #1
0
    def __init__(self,
                 data: str,
                 split: str,
                 extension: str,
                 length: Optional[int] = None):

        self.datapath = wsl_data_dir / data
        self.data = data

        if data in known_extensions.keys():
            self.extension = known_extensions[data]
        else:
            self.extension = extension

        self.names = pd.read_csv(wsl_csv_dir / data /
                                 f'{split}.csv').Id.tolist()

        if length is not None:
            self.names = random.sample(self.names, min(len(self.names),
                                                       length))

        self.new_size = (224, 224)
        self.image_transforms = Compose([
            Resize(self.new_size),
            RepeatChannel(repeats=3),
            CastToType(dtype=np.float32),
            ToTensor()
        ])
Beispiel #2
0
    def __init__(self, data: str, split: str, extension: str,
                 classes: int, col_name: str,
                 regression: bool, debug: bool = False):

        if regression and classes != 1:
            print('Support for multi-class regression is not available.')
            sys.exit(1)

        self.datapath = wsl_data_dir / data
        self.data = data
        self.classes = classes

        known_extensions = {'rsna': 'dcm', 'chexpert': 'jpg'}
        if data in known_extensions.keys():
            self.extension = known_extensions[data]
        else:
            self.extension = extension

        df = pd.read_csv(wsl_csv_dir / data / 'info.csv', converters={col_name: literal_eval})
        self.df = df
        df = df.drop_duplicates(subset='Id', keep='first', ignore_index=True)
        Ids = pd.read_csv(wsl_csv_dir / data / f'{split}.csv').Id.tolist()
        df = df[df.Id.isin(Ids)]

        self.names = df.Id.to_list()
        self.labels = df[col_name].tolist()

        if debug:
            self.names = self.names[0:100]
            self.labels = self.labels[0:100]

        self.image_transforms = Compose([
            Resize((224, 224)),
            RepeatChannel(repeats=3),
            CastToType(dtype=np.float32),
            ToTensor()])

        if regression:
            self.lmax = df[col_name].max()
            self.lmin = df[col_name].min()
            self.labels = [[round((x - self.lmin) / self.lmax, 2)] for x in self.labels]
        else:
            if classes == 1:
                self.labels = [[x] for x in self.labels]
            else:
                self.class_names = self.labels[0].keys()
                self.labels = [list(x.values()) for x in self.labels]

            self.pos_weight = [round((len(col) - sum(col)) / sum(col), 2) for col in zip(*self.labels)]
Beispiel #3
0
    def __init__(self,
                 data: str,
                 split: str,
                 extension: str,
                 classes: int,
                 column: str,
                 debug: bool = False):
        self.datapath = wsl_data_dir / data
        self.data = data
        self.classes = classes
        self.column = column

        if data in known_extensions.keys():
            self.extension = known_extensions[data]
        else:
            self.extension = extension

        df = pd.read_csv(wsl_csv_dir / data / 'info.csv',
                         converters={
                             column: literal_eval,
                             'box': literal_eval
                         })
        self.df = df
        Ids = pd.read_csv(wsl_csv_dir / data / f'{split}.csv').Id.tolist()
        df = df[df.Id.isin(Ids)]
        self.max_boxes = df['Id'].value_counts().max()
        self.names = list(set(df.Id.to_list()))
        if debug:
            self.names = self.names[0:100]

        self.image_transforms = Compose([
            # Resize((224, 224)),
            RepeatChannel(repeats=3),
            CastToType(dtype=np.float32),
            ToTensor()
        ])
Beispiel #4
0
    def __init__(self,
                 data: str,
                 split: str,
                 extension: str,
                 classes: int,
                 column: str,
                 variable_type: str,
                 augmentation: bool = False,
                 debug: bool = False):

        if classes != 1:
            print('Note: Ensure all labels are of a single type.')

        self.datapath = wsl_data_dir / data
        self.data = data
        self.classes = classes

        if data in known_extensions.keys():
            self.extension = known_extensions[data]
        else:
            self.extension = extension

        df = pd.read_csv(wsl_csv_dir / data / 'info.csv',
                         converters={
                             column: literal_eval,
                             'box': literal_eval
                         })
        self.df = df
        df = df.drop_duplicates(subset='Id', keep='first', ignore_index=True)
        Ids = pd.read_csv(wsl_csv_dir / data / f'{split}.csv').Id.tolist()
        df = df[df.Id.isin(Ids)]

        self.names = df.Id.to_list()
        self.labels = df[column].tolist()
        self.variable_type = variable_type

        if debug:
            self.names = self.names[0:100]
            self.labels = self.labels[0:100]

        self.new_size = (224, 224)
        self.image_transforms = Compose([
            Resize(self.new_size),
            RepeatChannel(repeats=3),
            CastToType(dtype=np.float32),
            ToTensor()
        ])

        self.augmentation = augmentation
        if augmentation:
            self.augmentation = Affine(rotate_params=np.pi / 6,
                                       scale_params=(1.2, 1.2),
                                       translate_params=(50, 50),
                                       padding_mode='zeros')
        else:
            self.augmentation = None

        if self.variable_type != 'categorical':
            if classes == 1:
                self.labels = [[x] for x in self.labels]
            else:
                self.class_names = self.labels[0].keys()
                print('\nClass List: ', self.class_names)
                self.labels = [list(x.values()) for x in self.labels]

            # only matters for balanced case for binary variable type
            self.pos_weight = [
                round((len(col) - sum(col)) / sum(col), 2)
                for col in zip(*self.labels)
            ]
Beispiel #5
0
 def test_shape(self, input_param, input_data, expected_shape):
     result = RepeatChannel(**input_param)(input_data)
     self.assertEqual(result.shape, expected_shape)