def _compute_statistics(self): for encoded in self.datasets: dataset, name = encoded.split(",") dataset = image_paths(dataset) dataset = dataset[:self.amount] images = map(lambda d: load_image(d, grayscale=not self.color), dataset) images_dct = map(dct2, images) # simple base statistics if self.color: mean, variance = welford_multidimensional(images_dct) else: mean, variance = welford(images_dct) std = np.sqrt(variance) self.means.append((f"mean_{name}", log_scale(mean))) self.stds.append((f"std_{name}", log_scale(std))) if self.ref_mean is None: self.ref_mean = mean self.ref_std = std continue # Other statistics calculated in reference to ref stats # mean difference mean_diff = np.abs( log_scale(np.abs(self.ref_mean)) - log_scale(np.abs(mean))) self.mean_differences.append((f"mean_differnce_{name}", mean_diff))
def _load_images(path, amount=None): paths = image_paths(path) if amount is not None: paths = paths[:amount] images = multiprocessing.Pool( multiprocessing.cpu_count()).map(_process_image, paths) return images
def main(args): paths = list() for data in args.DATASETS: paths += image_paths(data)[:args.AMOUNT] images = map(load_image, paths) images = map(dct2, images) mean, var = welford(images) os.makedirs(args.output, exist_ok=True) np.save(open(f"{args.output}/mean.npy", "wb+"), mean) np.save(open(f"{args.output}/var.npy", "wb+"), var)
def _collect_image_paths(dirictory): images = list(sorted(image_paths(dirictory))) assert len(images) >= TRAIN_SIZE + VAL_SIZE + \ TEST_SIZE, f"{len(images)} - {dirictory}" train_dataset = images[:TRAIN_SIZE] val_dataset = images[TRAIN_SIZE:TRAIN_SIZE + VAL_SIZE] test_dataset = images[TRAIN_SIZE + VAL_SIZE:TRAIN_SIZE + VAL_SIZE + TEST_SIZE] assert len( train_dataset) == TRAIN_SIZE, f"{len(train_dataset)} - {dirictory}" assert len(val_dataset) == VAL_SIZE, f"{len(val_dataset)} - {dirictory}" assert len(test_dataset) == TEST_SIZE, f"{len(test_dataset)} - {dirictory}" return (train_dataset, val_dataset, test_dataset)
def apply_transformation_to_datasets(datasets, mode, size): if mode == "noise": image_functions = [noise] elif mode == "blur": image_functions = [blur] elif mode == "jpeg": image_functions = [jpeg] elif mode == "cropping": image_functions = [cropping] elif mode == "combined": image_functions = [noise, blur, jpeg, cropping] else: raise NotImplementedError("Selected unrecognized mode: {mode}!") for dir_path in datasets: output_dir = f"{dir_path}_{mode}" os.makedirs(output_dir, exist_ok=True) paths = image_paths(dir_path)[:size] images = map(np.asarray, map(Image.open, paths)) for i, image in enumerate(images): current_function = image_functions.pop(0) new_image = image if np.random.sample() > .5: new_image = current_function(new_image) assert not np.isclose(new_image, image).all() Image.fromarray(new_image).save(f"{output_dir}/{mode}_{i:06}.png") image_functions.append(current_function) print( f"\rConverted {i+1: 6} out of {len(paths) if size is None else max(len(paths), size)} images for {dir_path}!", end="") print(f"\nFinished converting {dir_path}!")