def get_dataset(dfs_all, envs=[], split=None, only_frontal=True, imagenet_norm=True, augment=0, cache=False, subset_label=None): if split in ['val', 'test']: assert (augment in [0, -1]) if augment == 1: # image augmentations image_transforms = [ transforms.RandomHorizontalFlip(), transforms.RandomRotation(10), transforms.RandomResizedCrop(size=224, scale=(0.75, 1.0)), transforms.ToTensor() ] elif augment == 0: image_transforms = [transforms.ToTensor()] elif augment == -1: # only resize, just return a dataset with PIL images; don't ToTensor() image_transforms = [] if imagenet_norm and augment != -1: image_transforms.append( transforms.Normalize(Constants.IMAGENET_MEAN, Constants.IMAGENET_STD)) datasets = [] for e in envs: if split is not None: splits = [split] else: splits = ['train', 'val', 'test'] dfs = [dfs_all[e][i] for i in splits] for c, s in enumerate(splits): cache_dir = Path(Constants.cache_dir) / f'{e}/' cache_dir.mkdir(parents=True, exist_ok=True) datasets.append( AllDatasetsShared( dfs[c], transform=transforms.Compose(image_transforms), split=split, cache=cache, cache_dir=cache_dir, subset_label=subset_label)) if len(datasets) == 0: return None elif len(datasets) == 1: ds = datasets[0] else: ds = ConcatDataset(datasets) ds.dataframe = pd.concat([i.dataframe for i in datasets]) return ds
def get_dataset(envs=[], split=None, only_frontal=False, imagenet_norm=True, augment=0, cache=True, subset_label=None, augmented_dfs=None, output_type='normal', ifft_filter=None, pixel_thres=None, crop_patch_at_end=False, patched='none', patch_ind=None): if augment == 1: # normal image augmentation image_transforms = [ transforms.RandomHorizontalFlip(), transforms.RandomRotation(15), transforms.RandomResizedCrop(size=224, scale=(0.75, 1.0)), transforms.ToTensor() ] elif augment == 0: image_transforms = [transforms.ToTensor()] elif augment == -1: # only resize, just return a dataset with PIL images; don't ToTensor() image_transforms = [] if imagenet_norm and augment != -1: image_transforms.append( transforms.Normalize(Constants.IMAGENET_MEAN, Constants.IMAGENET_STD)) datasets = [] for e in envs: func = preprocess.get_process_func(e) paths = Constants.df_paths[e] if split is not None: splits = [split] else: splits = ['train', 'val', 'test'] if augmented_dfs is not None: # use provided dataframes for subsample augmentation dfs = [augmented_dfs[e][i] for i in splits] else: dfs = [func(pd.read_csv(paths[i]), only_frontal) for i in splits] for c, s in enumerate(splits): cache_dir = Path(Constants.cache_dir) / f'{e}_{s}/' cache_dir.mkdir(parents=True, exist_ok=True) datasets.append( AllDatasetsShared( dfs[c], transform=transforms.Compose(image_transforms), split=split, cache=cache, cache_dir=cache_dir, subset_label=subset_label, output_type=output_type, ifft_filter=ifft_filter, pixel_thres=pixel_thres, crop_patch_at_end=crop_patch_at_end, patched=patched, patch_ind=patch_ind)) if len(datasets) == 0: return None elif len(datasets) == 1: ds = datasets[0] else: ds = ConcatDataset(datasets) ds.dataframe = pd.concat([i.dataframe for i in datasets]) return ds