def main(datadir, convert_dir, crop_size): try: os.mkdir(convert_dir) except OSError: pass filenames = data_util.get_image_files(datadir) print('Resizing images in {} to {}'.format(datadir, convert_dir)) n = len(filenames) batch_size = 500 batches = n // batch_size + 1 p = Pool() args = [] for f in filenames: args.append((convert_size, (datadir, convert_dir, f, crop_size))) for i in range(batches): print('batch {:>2} / {}'.format(i + 1, batches)) p.map(convert, args[i * batch_size : (i + 1) * batch_size]) p.close() p.join() print('Done')
def main(datadir, convert_dir, crop_size): try: os.mkdir(convert_dir) except OSError: pass filenames = data_util.get_image_files(datadir) print('Resizing images in {} to {}'.format(datadir, convert_dir)) n = len(filenames) batch_size = 500 batches = n // batch_size + 1 p = Pool() args = [] for f in filenames: args.append((convert_size, (datadir, convert_dir, f, crop_size))) for i in range(batches): print('batch {:>2} / {}'.format(i + 1, batches)) p.map(convert, args[i * batch_size:(i + 1) * batch_size]) p.close() p.join() print('Done')
import numpy as np from time import time import pdb import skimage import matplotlib.pyplot as plt import data_util DATA_DIR = "converted" files = data_util.get_image_files(DATA_DIR) images = data_util.load_images(files) MEAN = data_util.compute_mean(files) STD = data_util.compute_std(files) images_normalized = [] for img in images: img = img - MEAN[:, np.newaxis, np.newaxis] img = img / STD[:, np.newaxis, np.newaxis] images_normalized.append(img) images_normalized = np.array(images_normalized) original_augmented = data_util.parallel_augment(images) normalized_augmented = data_util.parallel_augment(images_normalized) original = images[3] normalized = images_normalized[3] original = original.transpose(1, 2, 0) normalized = normalized.transpose(1, 2, 0)
import numpy as np import cPickle import data_util DATA_DIR = '/nikel/dhpark/fundus/kaggle/original/training/train_medium' files = data_util.get_image_files(DATA_DIR) mean = data_util.compute_mean_across_channels(files) std = data_util.compute_std_across_channels(files) print("computing done") print("dumping...") mean.dump("mean.dat") std.dump("std.dat")
import numpy as np from time import time import data import data_util from matplotlib import pyplot as plt aug_params = { 'zoom_range': (1 / 1.15, 1.15), 'rotation_range': (0, 360), 'shear_range': (0, 0), 'translation_range': (-40, 40), 'do_flip': True, 'allow_stretch': True, } files = data_util.get_image_files('testing') X = data_util.load_images(files) mean, std = data_util.compute_mean_and_std(files) print(mean, std) print("Number of images: {}".format(len(X))) # start = time() # result = data.batch_perturb_and_augment(X, 500, 500, aug_params=aug_params, sigma=0.5) # end = time() # print("Processing without parallelization took {} seconds".format(end - start)) #start = time() #result = data.parallel_perturb_and_augment(X, 500, 500, aug_params=aug_params, sigma=0.5) #result = data.parallel_perturb_and_augment(X, 500, 500) #end = time()
import data_util from matplotlib import pyplot as plt aug_params = { 'zoom_range': (1 / 1.15, 1.15), 'rotation_range': (0, 360), 'shear_range': (0, 0), 'translation_range': (-40, 40), 'do_flip': True, 'allow_stretch': True, } files = data_util.get_image_files('testing') X = data_util.load_images(files) mean, std = data_util.compute_mean_and_std(files) print(mean, std) print("Number of images: {}".format(len(X))) # start = time() # result = data.batch_perturb_and_augment(X, 500, 500, aug_params=aug_params, sigma=0.5) # end = time() # print("Processing without parallelization took {} seconds".format(end - start)) #start = time() #result = data.parallel_perturb_and_augment(X, 500, 500, aug_params=aug_params, sigma=0.5) #result = data.parallel_perturb_and_augment(X, 500, 500) #end = time()