def __init__(self, root: str, normal_class: int, preproc: str, nominal_label: int, supervise_mode: str, noise_mode: str, oe_limit: int, online_supervision: bool, logger: Logger = None): """ AD dataset for Cifar-10. :param root: root directory where data is found or is to be downloaded to :param normal_class: the class considered nominal :param preproc: the kind of preprocessing pipeline :param nominal_label: the label that marks nominal samples in training. The scores in the heatmaps always rate label 1, thus usually the nominal label is 0, s.t. the scores are anomaly scores. :param supervise_mode: the type of generated artificial anomalies. See :meth:`fcdd.datasets.bases.TorchvisionDataset._generate_artificial_anomalies_train_set`. :param noise_mode: the type of noise used, see :mod:`fcdd.datasets.noise_mode`. :param oe_limit: limits the number of different anomalies in case of Outlier Exposure (defined in noise_mode) :param online_supervision: whether to sample anomalies online in each epoch, or offline before training (same for all epochs in this case). :param logger: logger """ super().__init__(root, logger=logger) self.n_classes = 2 # 0: normal, 1: outlier self.shape = (3, 32, 32) self.raw_shape = (3, 32, 32) self.normal_classes = tuple([normal_class]) self.outlier_classes = list(range(0, 10)) self.outlier_classes.remove(normal_class) assert nominal_label in [0, 1] self.nominal_label = nominal_label self.anomalous_label = 1 if self.nominal_label == 0 else 0 if self.nominal_label != 0: print('Swapping labels, i.e. anomalies are 0 and nominals are 1.') # Pre-computed min and max values (after applying LCN) from train data per class min_max_l1 = [(-28.94083453598571, 13.802961825439636), (-6.681770233365245, 9.158067708230273), (-34.924463588638204, 14.419298165027628), (-10.599172931391799, 11.093187820377565), (-11.945022995801637, 10.628045447867583), (-9.691969487694928, 8.948326776180823), (-9.174940012342555, 13.847014686472365), (-6.876682005899029, 12.282371383343161), (-15.603507135507172, 15.2464923804279), (-6.132882973622672, 8.046098172351265)] # mean and std of original images per class mean = [ [0.5256516933441162, 0.5603281855583191, 0.5888723731040955], [0.4711322784423828, 0.45446228981018066, 0.4471212327480316], [0.48923906683921814, 0.49146366119384766, 0.423904687166214], [0.4954785108566284, 0.45636114478111267, 0.4154069721698761], [0.47155335545539856, 0.46515223383903503, 0.37797248363494873], [0.49992093443870544, 0.4646056592464447, 0.4164286255836487], [0.47001829743385315, 0.43829214572906494, 0.34500396251678467], [0.5019531846046448, 0.47983652353286743, 0.4167139232158661], [0.4902143180370331, 0.5253947973251343, 0.5546804070472717], [0.4986417591571808, 0.4852965474128723, 0.4780091941356659] ] std = [[0.2502202093601227, 0.24083486199378967, 0.2659735083580017], [0.26806357502937317, 0.2658274173736572, 0.2749459445476532], [0.22705480456352234, 0.2209445983171463, 0.24337927997112274], [0.2568431496620178, 0.25227081775665283, 0.25799375772476196], [0.21732737123966217, 0.20652702450752258, 0.21182335913181305], [0.2504253387451172, 0.24374878406524658, 0.2489463835954666], [0.22888341546058655, 0.21856172382831573, 0.2204199582338333], [0.2430490106344223, 0.243973046541214, 0.25171563029289246], [0.24962472915649414, 0.24068884551525116, 0.25149762630462646], [0.2680525481700897, 0.26910799741744995, 0.2810165584087372]] # different types of preprocessing pipelines, 'lcn' is for using LCN, 'aug{X}' for augmentations # also contains options for the black center experiments all_transform = [] if preproc == 'lcn': test_transform = transform = transforms.Compose([ transforms.ToTensor(), transforms.Lambda( lambda x: local_contrast_normalization(x, scale='l1')), transforms.Normalize([min_max_l1[normal_class][0]] * 3, [ min_max_l1[normal_class][1] - min_max_l1[normal_class][0] ] * 3) ]) elif preproc in ['', None, 'default', 'none']: test_transform = transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean[normal_class], std[normal_class]) ]) elif preproc in ['aug1']: test_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean[normal_class], std[normal_class]) ]) transform = transforms.Compose([ transforms.ColorJitter(brightness=0.01, contrast=0.01, saturation=0.01, hue=0.01), transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Lambda(lambda x: x + 0.001 * torch.randn_like(x)), transforms.Normalize(mean[normal_class], std[normal_class]) ]) elif preproc in ['aug1_blackcenter']: test_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean[normal_class], std[normal_class]) ]) transform = transforms.Compose([ transforms.ColorJitter(brightness=0.01, contrast=0.01, saturation=0.01, hue=0.01), transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Lambda(lambda x: x + 0.001 * torch.randn_like(x)), BlackCenter(0.6), transforms.Normalize(mean[normal_class], std[normal_class]) ]) elif preproc in ['aug1_blackcenter_inverted']: test_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean[normal_class], std[normal_class]) ]) transform = transforms.Compose([ transforms.ColorJitter(brightness=0.01, contrast=0.01, saturation=0.01, hue=0.01), transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Lambda(lambda x: x + 0.001 * torch.randn_like(x)), BlackCenter(0.6, inverse=True), transforms.Normalize(mean[normal_class], std[normal_class]) ]) else: raise ValueError( 'Preprocessing pipeline {} is not known.'.format(preproc)) target_transform = transforms.Lambda( lambda x: self.anomalous_label if x in self.outlier_classes else self.nominal_label) if online_supervision: all_transform = MultiCompose([ OnlineSuperviser(self, supervise_mode, noise_mode, oe_limit), *all_transform ]) else: all_transform = MultiCompose(all_transform) train_set = MYCIFAR10(root=self.root, train=True, download=True, normal_classes=self.normal_classes, transform=transform, target_transform=target_transform, all_transform=all_transform) train_set.targets = torch.from_numpy(np.asarray(train_set.targets)) train_set.data = torch.from_numpy(train_set.data).transpose( 1, 3).transpose(2, 3) self._generate_artificial_anomalies_train_set( supervise_mode if not online_supervision else 'unsupervised', noise_mode, oe_limit, train_set, normal_class) self._test_set = MYCIFAR10(root=self.root, train=False, download=True, normal_classes=self.normal_classes, transform=test_transform, target_transform=target_transform)
def __init__(self, root: str, normal_class: int, preproc: str, nominal_label: int, supervise_mode: str, noise_mode: str, oe_limit: int, online_supervision: bool, logger: Logger = None): """ AD dataset for Fashion-MNIST. :param root: root directory where data is found or is to be downloaded to :param normal_class: the class considered nominal :param preproc: the kind of preprocessing pipeline :param nominal_label: the label that marks nominal samples in training. The scores in the heatmaps always rate label 1, thus usually the nominal label is 0, s.t. the scores are anomaly scores. :param supervise_mode: the type of generated artificial anomalies. See :meth:`fcdd.datasets.bases.TorchvisionDataset._generate_artificial_anomalies_train_set`. :param noise_mode: the type of noise used, see :mod:`fcdd.datasets.noise_mode`. :param oe_limit: limits the number of different anomalies in case of Outlier Exposure (defined in noise_mode) :param online_supervision: whether to sample anomalies online in each epoch, or offline before training (same for all epochs in this case) :param logger: logger """ super().__init__(root, logger=logger) self.n_classes = 2 # 0: normal, 1: outlier self.shape = (1, 28, 28) self.raw_shape = (28, 28) self.normal_classes = tuple([normal_class]) self.outlier_classes = list(range(0, 10)) self.outlier_classes.remove(normal_class) assert nominal_label in [0, 1] self.nominal_label = nominal_label self.anomalous_label = 1 if self.nominal_label == 0 else 0 # Pre-computed min and max values (after applying LCN) from train data per class min_max_l1 = [ (-2.681239128112793, 24.85430908203125), (-2.5778584480285645, 11.169795989990234), (-2.808171510696411, 19.133548736572266), (-1.9533653259277344, 18.65673065185547), (-2.6103856563568115, 19.166685104370117), (-1.2358522415161133, 28.463092803955078), (-3.251605987548828, 24.196823120117188), (-1.0814440250396729, 16.878812789916992), (-3.6560964584350586, 11.3502836227417), (-1.3859291076660156, 11.426650047302246) ] # mean and std of original images per class mean = [ [0.3256056010723114], [0.22290456295013428], [0.376699835062027], [0.25889596343040466], [0.3853232264518738], [0.1367349475622177], [0.3317836821079254], [0.16769391298294067], [0.35355499386787415], [0.30119451880455017] ] std = [ [0.35073918104171753], [0.34353047609329224], [0.3586803078651428], [0.3542196452617645], [0.37631189823150635], [0.26310813426971436], [0.3392786681652069], [0.29478660225868225], [0.3652712106704712], [0.37053292989730835] ] # different types of preprocessing pipelines, 'lcn' is for using LCN, 'aug{X}' for augmentations if preproc == 'lcn': test_transform = transform = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: local_contrast_normalization(x, scale='l1')), transforms.Normalize( [min_max_l1[normal_class][0]], [min_max_l1[normal_class][1] - min_max_l1[normal_class][0]] ) ]) elif preproc in ['', None, 'default', 'none']: test_transform = transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean[normal_class], std[normal_class]) ]) elif preproc in ['aug1']: test_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean[normal_class], std[normal_class]) ]) transform = transforms.Compose([ transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), transforms.RandomCrop(28, padding=3), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Lambda(lambda x: x + 0.01 * torch.randn_like(x)), transforms.Normalize(mean[normal_class], std[normal_class]) ]) elif preproc in ['lcnaug1']: test_transform = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: local_contrast_normalization(x, scale='l1')), transforms.Normalize( [min_max_l1[normal_class][0]], [min_max_l1[normal_class][1] - min_max_l1[normal_class][0]] ) ]) transform = transforms.Compose([ transforms.RandomCrop(28, padding=2), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Lambda(lambda x: local_contrast_normalization(x, scale='l1')), transforms.Normalize( [min_max_l1[normal_class][0]], [min_max_l1[normal_class][1] - min_max_l1[normal_class][0]] ) ]) else: raise ValueError('Preprocessing pipeline {} is not known.'.format(preproc)) target_transform = transforms.Lambda( lambda x: self.anomalous_label if x in self.outlier_classes else self.nominal_label ) all_transform = None if online_supervision: if noise_mode not in ['emnist']: self.raw_shape = (1, 28, 28) all_transform = MultiCompose([ OnlineSupervisor(self, supervise_mode, noise_mode, oe_limit), transforms.Lambda(lambda x: x.squeeze() if isinstance(x, torch.Tensor) else x) ]) else: all_transform = MultiCompose([OnlineSupervisor(self, supervise_mode, noise_mode, oe_limit), ]) self.raw_shape = (28, 28) train_set = MyFashionMNIST(root=self.root, train=True, download=True, normal_classes=self.normal_classes, transform=transform, target_transform=target_transform, all_transform=all_transform) self._generate_artificial_anomalies_train_set( supervise_mode if not online_supervision else 'unsupervised', # gets rid of true anomalous samples noise_mode, oe_limit, train_set, normal_class ) self._test_set = MyFashionMNIST(root=self.root, train=False, download=True, normal_classes=self.normal_classes, transform=test_transform, target_transform=target_transform)
def __init__(self, root: str, normal_class: int, preproc: str, nominal_label: int, supervise_mode: str, noise_mode: str, oe_limit: int, online_supervision: bool, logger: Logger = None): """ This is a general-purpose implementation for custom datasets. It expects the data being contained in class folders and distinguishes between (1) the one-vs-rest (ovr) approach where one class is considered normal and is tested against all other classes being anomalous (2) the general approach where each class folder contains a normal data folder and an anomalous data folder. The :attr:`ovr` determines this. For (1) the data folders have to follow this structure: root/custom/train/dog/xxx.png root/custom/train/dog/xxy.png root/custom/train/dog/xxz.png root/custom/train/cat/123.png root/custom/train/cat/nsdf3.png root/custom/train/cat/asd932_.png For (2): root/custom/train/hazelnut/normal/xxx.png root/custom/train/hazelnut/normal/xxy.png root/custom/train/hazelnut/normal/xxz.png root/custom/train/hazelnut/anomalous/xxa.png -- may be used during training for a semi-supervised setting root/custom/train/screw/normal/123.png root/custom/train/screw/normal/nsdf3.png root/custom/train/screw/anomalous/asd932_.png -- may be used during training for a semi-supervised setting The same holds for the test set, where "train" has to be replaced by "test". To take advantage of available binary ground-truth anomaly maps, you need to place them in separate folders. That is, create the folders "train_maps" and/or "test_maps" and place the corresponding maps using the same structure and name as above. For instance: root/custom/train_maps/dog/xxx.png for root/custom/train/dog/xxx.png or root/custom/test_maps/screw/normal/123.png for root/custom/test/screw/normal/123.png The ground-truth maps need to be binary; i.e., need to be in {0, 255}^{1 x h x w}, where 255 marks anomalous regions. Missing maps are replaced by tensors filled with the corresponding label (e.g., 255 for anomalies). That is, a completely white or black image. However, for computing a pixel-wise ROC score measuring the explanation performance, all maps for the anomalous test samples are required. Otherwise, the pixel-wise ROC evaluation is skipped. :param root: root directory where data is found. :param normal_class: the class considered nominal. :param preproc: the kind of preprocessing pipeline. :param nominal_label: the label that marks nominal samples in training. The scores in the heatmaps always rate label 1, thus usually the nominal label is 0, s.t. the scores are anomaly scores. :param supervise_mode: the type of generated artificial anomalies. See :meth:`fcdd.datasets.bases.TorchvisionDataset._generate_artificial_anomalies_train_set`. :param noise_mode: the type of noise used, see :mod:`fcdd.datasets.noise_mode`. :param oe_limit: limits the number of different anomalies in case of Outlier Exposure (defined in noise_mode). :param online_supervision: whether to sample anomalies online in each epoch, or offline before training (same for all epochs in this case). :param logger: logger. """ super().__init__( root, normal_class, preproc, nominal_label, supervise_mode, noise_mode, oe_limit, online_supervision, logger ) # img_gtm transforms transform images and corresponding ground-truth maps jointly. # This is critically required for random geometric transformations as otherwise # the maps would not match the images anymore. if preproc in ['', None, 'default', 'none']: img_gtm_test_transform = img_gtm_transform = MultiCompose([ transforms.Resize((self.shape[-2], self.shape[-1]), Image.NEAREST), transforms.ToTensor(), ]) test_transform = transform = transforms.Compose([ transforms.Normalize(self.mean, self.std) ]) elif preproc in ['aug1']: img_gtm_transform = MultiCompose([ transforms.RandomChoice([ MultiCompose([ transforms.Resize((self.raw_shape[-2], self.raw_shape[-1]), Image.NEAREST), transforms.RandomCrop((self.shape[-2], self.shape[-1]), Image.NEAREST), ]), transforms.Resize((self.shape[-2], self.shape[-1]), Image.NEAREST), ]), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) img_gtm_test_transform = MultiCompose( [transforms.Resize((self.shape[-2], self.shape[-1]), Image.NEAREST), transforms.ToTensor()] ) test_transform = transforms.Compose([ transforms.Normalize(self.mean, self.std) ]) transform = transforms.Compose([ transforms.ToPILImage(), transforms.ColorJitter(brightness=0.01, contrast=0.01, saturation=0.01, hue=0.01), transforms.ToTensor(), transforms.Lambda(lambda x: x + 0.001 * torch.randn_like(x)), transforms.Normalize(self.mean, self.std) ]) # here you could define other pipelines with augmentations else: raise ValueError('Preprocessing pipeline {} is not known.'.format(preproc)) self._train_set = ImageFolderDatasetGTM( self.trainpath, supervise_mode, self.raw_shape, self.ovr, self.nominal_label, self.anomalous_label, normal_classes=self.normal_classes, transform=transform, target_transform=self.target_transform, all_transform=self.all_transform, img_gtm_transform=img_gtm_transform ) if supervise_mode == 'other': # (semi)-supervised setting self.balance_dataset(gtm=True) else: self._train_set = GTSubset( self._train_set, np.argwhere( (np.asarray(self._train_set.anomaly_labels) == self.nominal_label) * np.isin(self._train_set.targets, self.normal_classes) ).flatten().tolist() ) self._test_set = ImageFolderDatasetGTM( self.testpath, supervise_mode, self.raw_shape, self.ovr, self.nominal_label, self.anomalous_label, normal_classes=self.normal_classes, transform=test_transform, target_transform=self.target_transform, img_gtm_transform=img_gtm_test_transform ) if not self.ovr: self._test_set = GTSubset( self._test_set, get_target_label_idx(self._test_set.targets, np.asarray(self.normal_classes)) )
def __init__(self, root: str, normal_class: int, preproc: str, nominal_label: int, supervise_mode: str, noise_mode: str, oe_limit: int, online_supervision: bool, logger: Logger = None): """ AD dataset for PascalVoc. Considers only the "horse" class, thus normal_class must be 0! :param root: root directory where data is found or is to be downloaded to :param normal_class: the class considered nominal :param preproc: the kind of preprocessing pipeline :param nominal_label: the label that marks nominal samples in training. The scores in the heatmaps always rate label 1, thus usually the nominal label is 0, s.t. the scores are anomaly scores. :param supervise_mode: the type of generated artificial anomalies. See :meth:`fcdd.datasets.bases.TorchvisionDataset._generate_artificial_anomalies_train_set`. :param noise_mode: the type of noise used, see :mod:`fcdd.datasets.noise_mode`. :param oe_limit: limits the number of different anomalies in case of Outlier Exposure (defined in noise_mode) :param online_supervision: whether to sample anomalies online in each epoch, or offline before training (same for all epochs in this case) :param logger: logger """ super().__init__(root, logger=logger) assert normal_class == 0, 'One cls dataset with horse only!' if supervise_mode != 'unsupervised': assert online_supervision, 'PascalVoc artificial anomaly generation needs to be applied online' self.n_classes = 2 # 0: normal, 1: outlier self.shape = (3, 224, 224) self.raw_shape = (3, 224, 224) self.outlier_classes = list(range(0, 10)) self.outlier_classes.remove(normal_class) assert nominal_label in [0, 1] self.nominal_label = nominal_label self.anomalous_label = 1 if self.nominal_label == 0 else 0 self.normal_classes = tuple([self.nominal_label]) if self.nominal_label != 0: print('Swapping labels, i.e. anomalies are 0 and nominals are 1.') # mean and std of original pictures mean = (0.4469, 0.4227, 0.3906) std = (0.2691, 0.2659, 0.2789) all_transform = [] if preproc in ['', None, 'default', 'none']: test_transform = transform = transforms.Compose([ transforms.Resize( (self.shape[-1], self.raw_shape[-1] )), # not short edge because that skips watermark transforms.ToTensor(), transforms.Normalize(mean[normal_class], std[normal_class]) ]) elif preproc in ['aug1']: test_transform = transforms.Compose([ transforms.Resize((self.raw_shape[-1], self.raw_shape[-1])), transforms.CenterCrop(self.shape[-1]), transforms.ToTensor(), transforms.Normalize(mean, std) ]) transform = transforms.Compose([ transforms.Resize((self.raw_shape[-1], self.raw_shape[-1])), transforms.ColorJitter(brightness=0.01, contrast=0.01, saturation=0.01, hue=0.01), transforms.RandomCrop(self.shape[-1]), transforms.ToTensor(), transforms.Lambda(lambda x: x + 0.001 * torch.randn_like(x)), transforms.Normalize(mean, std) ]) else: raise ValueError( 'Preprocessing pipeline {} is not known.'.format(preproc)) if online_supervision: all_transform = MultiCompose([ # in case of OutlierExposure with ImageNet, exclude VOC names from classes! OnlineSupervisor(self, supervise_mode, noise_mode, oe_limit, exclude=MyPascalVoc.NAMES), *all_transform ]) else: all_transform = MultiCompose(all_transform) train_set = MyPascalVoc(root=self.root, split='train', download=True, nominal_label=self.nominal_label, transform=transform, all_transform=all_transform, anomlbl=self.anomalous_label) self._generate_artificial_anomalies_train_set( supervise_mode if not online_supervision else 'unsupervised', noise_mode, oe_limit, train_set, normal_class) self._test_set = MyPascalVoc(root=self.root, split='val', download=True, nominal_label=self.nominal_label, transform=test_transform, anomlbl=self.anomalous_label)
def __init__(self, root: str, normal_class: int, preproc: str, nominal_label: int, supervise_mode: str, noise_mode: str, oe_limit: int, online_supervision: bool, logger: Logger = None, raw_shape: int = 240): """ AD dataset for MVTec-AD. If no MVTec data is found in the root directory, the data is downloaded and processed to be stored in torch tensors with appropriate size (defined in raw_shape). This speeds up data loading at the start of training. :param root: root directory where data is found or is to be downloaded to :param normal_class: the class considered nominal :param preproc: the kind of preprocessing pipeline :param nominal_label: the label that marks nominal samples in training. The scores in the heatmaps always rate label 1, thus usually the nominal label is 0, s.t. the scores are anomaly scores. :param supervise_mode: the type of generated artificial anomalies. See :meth:`fcdd.datasets.bases.TorchvisionDataset._generate_artificial_anomalies_train_set`. :param noise_mode: the type of noise used, see :mod:`fcdd.datasets.noise_mode`. :param oe_limit: limits the number of different anomalies in case of Outlier Exposure (defined in noise_mode) :param online_supervision: whether to sample anomalies online in each epoch, or offline before training (same for all epochs in this case). :param logger: logger :param raw_shape: the height and width of the raw MVTec images before passed through the preprocessing pipeline. """ super().__init__(root, logger=logger) self.n_classes = 2 # 0: normal, 1: outlier self.shape = (3, 224, 224) self.raw_shape = (3,) + (raw_shape, ) * 2 self.normal_classes = tuple([normal_class]) self.outlier_classes = list(range(0, 15)) self.outlier_classes.remove(normal_class) assert nominal_label in [0, 1], 'GT maps are required to be binary!' self.nominal_label = nominal_label self.anomalous_label = 1 if self.nominal_label == 0 else 0 # min max after gcn l1 norm has been applied min_max_l1 = [ [(-1.3336724042892456, -1.3107913732528687, -1.2445921897888184), (1.3779616355895996, 1.3779616355895996, 1.3779616355895996)], [(-2.2404820919036865, -2.3387579917907715, -2.2896201610565186), (4.573435306549072, 4.573435306549072, 4.573435306549072)], [(-3.184587001800537, -3.164201259613037, -3.1392977237701416), (1.6995097398757935, 1.6011602878570557, 1.5209171772003174)], [(-3.0334954261779785, -2.958242416381836, -2.7701096534729004), (6.503103256225586, 5.875098705291748, 5.814228057861328)], [(-3.100773334503174, -3.100773334503174, -3.100773334503174), (4.27892541885376, 4.27892541885376, 4.27892541885376)], [(-3.6565306186676025, -3.507692813873291, -2.7635035514831543), (18.966819763183594, 21.64590072631836, 26.408710479736328)], [(-1.5192601680755615, -2.2068002223968506, -2.3948357105255127), (11.564697265625, 10.976534843444824, 10.378695487976074)], [(-1.3207964897155762, -1.2889339923858643, -1.148416519165039), (6.854909896850586, 6.854909896850586, 6.854909896850586)], [(-0.9883341193199158, -0.9822461605072021, -0.9288841485977173), (2.290637969970703, 2.4007883071899414, 2.3044068813323975)], [(-7.236185073852539, -7.236185073852539, -7.236185073852539), (3.3777384757995605, 3.3777384757995605, 3.3777384757995605)], [(-3.2036616802215576, -3.221003532409668, -3.305514335632324), (7.022546768188477, 6.115569114685059, 6.310940742492676)], [(-0.8915618658065796, -0.8669204115867615, -0.8002046346664429), (4.4255571365356445, 4.642300128936768, 4.305730819702148)], [(-1.9086798429489136, -2.0004451274871826, -1.929288387298584), (5.463134765625, 5.463134765625, 5.463134765625)], [(-2.9547364711761475, -3.17536997795105, -3.143850803375244), (5.305514812469482, 4.535006523132324, 3.3618252277374268)], [(-1.2906527519226074, -1.2906527519226074, -1.2906527519226074), (2.515115737915039, 2.515115737915039, 2.515115737915039)] ] # mean and std of original images per class mean = [ (0.53453129529953, 0.5307118892669678, 0.5491130352020264), (0.326835036277771, 0.41494372487068176, 0.46718254685401917), (0.6953922510147095, 0.6663950085639954, 0.6533040404319763), (0.36377236247062683, 0.35087138414382935, 0.35671544075012207), (0.4484519958496094, 0.4484519958496094, 0.4484519958496094), (0.2390524297952652, 0.17620408535003662, 0.17206747829914093), (0.3919542133808136, 0.2631213963031769, 0.22006843984127045), (0.21368788182735443, 0.23478130996227264, 0.24079132080078125), (0.30240726470947266, 0.3029524087905884, 0.32861486077308655), (0.7099748849868774, 0.7099748849868774, 0.7099748849868774), (0.4567880630493164, 0.4711957275867462, 0.4482630491256714), (0.19987481832504272, 0.18578395247459412, 0.19361256062984467), (0.38699793815612793, 0.276934415102005, 0.24219433963298798), (0.6718143820762634, 0.47696375846862793, 0.35050269961357117), (0.4014520049095154, 0.4014520049095154, 0.4014520049095154) ] std = [ (0.3667600452899933, 0.3666728734970093, 0.34991779923439026), (0.15321789681911469, 0.21510766446590424, 0.23905669152736664), (0.23858436942100525, 0.2591284513473511, 0.2601949870586395), (0.14506031572818756, 0.13994529843330383, 0.1276693195104599), (0.1636597216129303, 0.1636597216129303, 0.1636597216129303), (0.1688646823167801, 0.07597383111715317, 0.04383210837841034), (0.06069392338395119, 0.04061736911535263, 0.0303945429623127), (0.1602524220943451, 0.18222476541996002, 0.15336430072784424), (0.30409011244773865, 0.30411985516548157, 0.28656429052352905), (0.1337062269449234, 0.1337062269449234, 0.1337062269449234), (0.12076705694198608, 0.13341768085956573, 0.12879984080791473), (0.22920562326908112, 0.21501320600509644, 0.19536510109901428), (0.20621345937252045, 0.14321941137313843, 0.11695228517055511), (0.08259467780590057, 0.06751163303852081, 0.04756828024983406), (0.32304847240448, 0.32304847240448, 0.32304847240448) ] # different types of preprocessing pipelines, 'lcn' is for using LCN, 'aug{X}' for augmentations img_gt_transform, img_gt_test_transform = None, None all_transform = [] if preproc == 'lcn': assert self.raw_shape == self.shape, 'in case of no augmentation, raw shape needs to fit net input shape' img_gt_transform = img_gt_test_transform = MultiCompose([ transforms.ToTensor(), ]) test_transform = transform = transforms.Compose([ transforms.Lambda(lambda x: local_contrast_normalization(x, scale='l1')), transforms.Normalize( min_max_l1[normal_class][0], [ma - mi for ma, mi in zip(min_max_l1[normal_class][1], min_max_l1[normal_class][0])] ) ]) elif preproc in ['', None, 'default', 'none']: assert self.raw_shape == self.shape, 'in case of no augmentation, raw shape needs to fit net input shape' img_gt_transform = img_gt_test_transform = MultiCompose([ transforms.ToTensor(), ]) test_transform = transform = transforms.Compose([ transforms.Normalize(mean[normal_class], std[normal_class]) ]) elif preproc in ['aug1']: img_gt_transform = MultiCompose([ transforms.RandomChoice( [transforms.RandomCrop(self.shape[-1], padding=0), transforms.Resize(self.shape[-1], Image.NEAREST)] ), transforms.ToTensor(), ]) img_gt_test_transform = MultiCompose( [transforms.Resize(self.shape[-1], Image.NEAREST), transforms.ToTensor()] ) test_transform = transforms.Compose([ transforms.Normalize(mean[normal_class], std[normal_class]) ]) transform = transforms.Compose([ transforms.ToPILImage(), transforms.RandomChoice([ transforms.ColorJitter(0.04, 0.04, 0.04, 0.04), transforms.ColorJitter(0.005, 0.0005, 0.0005, 0.0005), ]), transforms.ToTensor(), transforms.Lambda( lambda x: (x + torch.randn_like(x).mul(np.random.randint(0, 2)).mul(x.std()).mul(0.1)).clamp(0, 1) ), transforms.Normalize(mean[normal_class], std[normal_class]) ]) elif preproc in ['lcnaug1']: img_gt_transform = MultiCompose([ transforms.RandomChoice( [transforms.RandomCrop(self.shape[-1], padding=0), transforms.Resize(self.shape[-1], Image.NEAREST)] ), transforms.ToTensor(), ]) img_gt_test_transform = MultiCompose( [transforms.Resize(self.shape[-1], Image.NEAREST), transforms.ToTensor()] ) test_transform = transforms.Compose([ transforms.Lambda(lambda x: local_contrast_normalization(x, scale='l1')), transforms.Normalize( min_max_l1[normal_class][0], [ma - mi for ma, mi in zip(min_max_l1[normal_class][1], min_max_l1[normal_class][0])] ) ]) transform = transforms.Compose([ transforms.ToPILImage(), transforms.RandomChoice([ transforms.ColorJitter(0.04, 0.04, 0.04, 0.04), transforms.ColorJitter(0.005, 0.0005, 0.0005, 0.0005), ]), transforms.ToTensor(), transforms.Lambda( lambda x: (x + torch.randn_like(x).mul(np.random.randint(0, 2)).mul(x.std()).mul(0.1)).clamp(0, 1) ), transforms.Lambda(lambda x: local_contrast_normalization(x, scale='l1')), transforms.Normalize( min_max_l1[normal_class][0], [ma - mi for ma, mi in zip(min_max_l1[normal_class][1], min_max_l1[normal_class][0])] ) ]) else: raise ValueError('Preprocessing pipeline {} is not known.'.format(preproc)) target_transform = transforms.Lambda( lambda x: self.anomalous_label if x in self.outlier_classes else self.nominal_label ) if online_supervision: # order: target_transform -> all_transform -> img_gt transform -> transform assert supervise_mode not in ['supervised'], 'supervised mode works only offline' all_transform = MultiCompose([ *all_transform, OnlineSupervisor(self, supervise_mode, noise_mode, oe_limit), ]) train_set = MvTec( root=self.root, split='train', download=True, target_transform=target_transform, img_gt_transform=img_gt_transform, transform=transform, all_transform=all_transform, shape=self.raw_shape, normal_classes=self.normal_classes, nominal_label=self.nominal_label, anomalous_label=self.anomalous_label, enlarge=ADMvTec.enlarge ) self._train_set = GTSubset( train_set, get_target_label_idx(train_set.targets.clone().data.cpu().numpy(), self.normal_classes) ) test_set = MvTec( root=self.root, split='test_anomaly_label_target', download=True, target_transform=transforms.Lambda( lambda x: self.anomalous_label if x != MvTec.normal_anomaly_label_idx else self.nominal_label ), img_gt_transform=img_gt_test_transform, transform=test_transform, shape=self.raw_shape, normal_classes=self.normal_classes, nominal_label=self.nominal_label, anomalous_label=self.anomalous_label, enlarge=False ) test_idx_normal = get_target_label_idx(test_set.targets.clone().data.cpu().numpy(), self.normal_classes) self._test_set = GTSubset(test_set, test_idx_normal) else: all_transform = MultiCompose([ *all_transform, ]) if len(all_transform) > 0 else None train_set = MvTec( root=self.root, split='train', download=True, target_transform=target_transform, all_transform=all_transform, img_gt_transform=img_gt_transform, transform=transform, shape=self.raw_shape, normal_classes=self.normal_classes, nominal_label=self.nominal_label, anomalous_label=self.anomalous_label, enlarge=ADMvTec.enlarge ) test_set = MvTec( root=self.root, split='test_anomaly_label_target', download=True, target_transform=transforms.Lambda( lambda x: self.anomalous_label if x != MvTec.normal_anomaly_label_idx else self.nominal_label ), img_gt_transform=img_gt_test_transform, transform=test_transform, shape=self.raw_shape, normal_classes=self.normal_classes, nominal_label=self.nominal_label, anomalous_label=self.anomalous_label, enlarge=False ) test_idx_normal = get_target_label_idx(test_set.targets.clone().data.cpu().numpy(), self.normal_classes) self._test_set = GTSubset(test_set, test_idx_normal) self._generate_artificial_anomalies_train_set(supervise_mode, noise_mode, oe_limit, train_set, normal_class)
def __init__(self, size: torch.Size, clsses: List[int], root: str = None, limit_var: int = np.infty, limit_per_anomaly=True, download=True, logger: Logger = None, gt=False, remove_nominal=True): """ Outlier Exposure dataset for MVTec-AD. Considers only a part of the classes. :param size: size of the samples in n x c x h x w, samples will be resized to h x w. If n is larger than the number of samples available in MVTec-AD, dataset will be enlarged by repetitions to fit n. This is important as exactly n images are extracted per iteration of the data_loader. For online supervision n should be set to 1 because only one sample is extracted at a time. :param clsses: the classes that are to be considered, i.e. all other classes are dismissed. :param root: root directory where data is found or is to be downloaded to. :param limit_var: limits the number of different samples, i.e. randomly chooses limit_var many samples from all available ones to be the training data. :param limit_per_anomaly: whether limit_var limits the number of different samples per type of defection or overall. :param download: whether to download the data if it is not found in root. :param logger: logger. :param gt: whether ground-truth maps are to be included in the data. :param remove_nominal: whether nominal samples are to be excluded from the data. """ assert len(size) == 4 and size[2] == size[3] assert size[1] in [1, 3] self.root = root self.logger = logger self.size = size self.use_gt = gt self.clsses = clsses super().__init__(root, 'test', download=download, shape=size[1:], logger=logger) self.img_gt_transform = MultiCompose( [transforms.Resize((size[2], size[2])), transforms.ToTensor()]) self.picks = get_target_label_idx(self.targets, self.clsses) if remove_nominal: self.picks = sorted( list( set.intersection( set(self.picks), set((self.anomaly_labels != self.normal_anomaly_label_idx ).nonzero().squeeze().tolist())))) if limit_per_anomaly and limit_var is not None: new_picks = [] for l in set(self.anomaly_labels.tolist()): linclsses = list( set.intersection( set(self.picks), set((self.anomaly_labels == l ).nonzero().squeeze().tolist()))) if len(linclsses) == 0: continue if limit_var < len(linclsses): new_picks.extend( np.random.choice(linclsses, size=limit_var, replace=False)) else: self.logprint( 'OEMvTec shall be limited to {} samples per anomaly label, ' 'but MvTec anomaly label {} contains only {} samples, thus using all.' .format(limit_var, self.anomaly_label_strings[l], len(linclsses)), fps=False) new_picks.extend(linclsses) self.picks = sorted(new_picks) else: if limit_var is not None and limit_var < len(self): self.picks = np.random.choice(self.picks, size=limit_var, replace=False) if limit_var is not None and limit_var > len(self): self.logprint( 'OEMvTec shall be limited to {} samples, but MvTec contains only {} samples, thus using all.' .format(limit_var, len(self))) if len(self) < size[0]: raise NotImplementedError()