def _compute_statistics(self): for encoded in self.datasets: dataset, name = encoded.split(",") dataset = image_paths(dataset) dataset = dataset[:self.amount] images = map(lambda d: load_image(d, grayscale=not self.color), dataset) images_dct = map(dct2, images) # simple base statistics if self.color: mean, variance = welford_multidimensional(images_dct) else: mean, variance = welford(images_dct) std = np.sqrt(variance) self.means.append((f"mean_{name}", log_scale(mean))) self.stds.append((f"std_{name}", log_scale(std))) if self.ref_mean is None: self.ref_mean = mean self.ref_std = std continue # Other statistics calculated in reference to ref stats # mean difference mean_diff = np.abs( log_scale(np.abs(self.ref_mean)) - log_scale(np.abs(mean))) self.mean_differences.append((f"mean_differnce_{name}", mean_diff))
def test_welford(): for _ in range(1_000): random_data = np.random.randn(5, 5, 1_000) mean, var = welford(random_data) assert np.isclose(random_data.mean(axis=0), mean).all() assert np.isclose(random_data.var(axis=0), var).all()
def main(args): output = f"{args.DIRECTORY.rstrip('/')}" # we always load images into numpy arrays # we additionally set a flag if we later convert to tensorflow records load_function = functools.partial(load_image, tf=args.mode == "tfrecords") transformation_function = None normalize_function = None if args.color: load_function = functools.partial(load_function, grayscale=False) output += "_color" # dct or raw image data? if args.raw: output += "_raw" # normalization sclaes to [-1, 1] if args.normalize: normalize_function = scale_image output += "_normalized" else: output += "_dct" transformation_function = _dct2_wrapper if args.log: # log sclae only for dct coefficients assert args.raw is False transformation_function = functools.partial(_dct2_wrapper, log=True) output += "_log_scaled" if args.normalize: # normalize to zero mean and unit variance train, _, _ = collect_all_paths(args.DIRECTORY) images = map(lambda x: x[0], train) images = map(load_function, images) images = map(transformation_function, images) mean, var = welford(images) std = np.sqrt(var) output += "_normalized" normalize_function = functools.partial(normalize, mean=mean, std=std) encode_function = functools.partial( convert_images, load_function=load_function, transformation_function=transformation_function, normalize_function=normalize_function) if args.mode == "normal": normal_mode(args.DIRECTORY, encode_function, output) elif args.mode == "tfrecords": tfmode(args.DIRECTORY, encode_function, output)
def main(args): paths = list() for data in args.DATASETS: paths += image_paths(data)[:args.AMOUNT] images = map(load_image, paths) images = map(dct2, images) mean, var = welford(images) os.makedirs(args.output, exist_ok=True) np.save(open(f"{args.output}/mean.npy", "wb+"), mean) np.save(open(f"{args.output}/var.npy", "wb+"), var)
def main(args): output = f"{args.DIRECTORY.rstrip('/')}" # we always load images into numpy arrays # we additionally set a flag if we later convert to tensorflow records load_function = functools.partial(load_image, tf=args.mode == "tfrecords") transformation_function = None normalize_function = None absolute_function = None if args.color: load_function = functools.partial(load_function, grayscale=False) output += "_color" # dct or raw image data? if args.raw: output += "_raw" # normalization sclaes to [-1, 1] if args.normalize: normalize_function = scale_image output += "_normalized" else: output += "_dct" transformation_function = _dct2_wrapper if args.log: # log sclae only for dct coefficients assert args.raw is False transformation_function = functools.partial(_dct2_wrapper, log=True) output += "_log_scaled" if args.abs: # normalize to zero mean and unit variance train, _, _ = collect_all_paths(args.DIRECTORY) train = train[:TRAIN_SIZE * len(args.DIRECTORY) * 0.1] images = map(lambda x: x[0], train) images = map(load_function, images) images = map(transformation_function, images) first = next(images) current_max = np.absolute(first) for data in images: max_values = np.absolute(data) mask = current_max > max_values current_max *= mask current_max += max_values * ~mask def scale_by_absolute(image): return image / current_max absolute_function = scale_by_absolute if args.normalize: # normalize to zero mean and unit variance train, _, _ = collect_all_paths(args.DIRECTORY) images = map(lambda x: x[0], train) images = map(load_function, images) images = map(transformation_function, images) if absolute_function is not None: images = map(absolute_function, images) mean, var = welford(images) std = np.sqrt(var) output += "_normalized" normalize_function = functools.partial(normalize, mean=mean, std=std) encode_function = functools.partial( convert_images, load_function=load_function, transformation_function=transformation_function, normalize_function=normalize_function, absolute_function=absolute_function) if args.mode == "normal": normal_mode(args.DIRECTORY, encode_function, output) elif args.mode == "tfrecords": tfmode(args.DIRECTORY, encode_function, output)