def __init__(self, keys, batch_size, std, mean, coates_features=None, cv=False, n_eyes=1): super(TTABatchIterator, self).__init__(keys, batch_size, std, mean, coates_features=coates_features, test=True, cv=cv, n_eyes=n_eyes) # Initialize augmenter self.augmenter = Augmenter() self.i = 0 self.rotations = [0, 45, 90, 135, 180, 225, 270, 315] self.flips = [True, False] self.hue = [0] self.saturation = [0] self.ttas = len(self.rotations) * len(self.flips) * len( self.hue) * len(self.saturation) self.n_eyes = n_eyes
def read_and_augment(keys): augmenter = Augmenter() images = np.zeros( (len(keys),CHANNELS,PIXELS,PIXELS), dtype=np.float32) for i, key in enumerate(keys): image = scipy.misc.imread(IMAGE_SOURCE + "/" + 'train' + "/" + key + ".jpeg").transpose(2, 0, 1) image = image/256.0 images[i] = image return augmenter.augment(images)
def read_and_augment(keys): augmenter = Augmenter() images = np.zeros((len(keys), CHANNELS, PIXELS, PIXELS), dtype=np.float32) for i, key in enumerate(keys): image = scipy.misc.imread(IMAGE_SOURCE + "/" + 'train' + "/" + key + ".jpeg").transpose(2, 0, 1) image = image / 256.0 images[i] = image return augmenter.augment(images)
class AugmentingParallelBatchIterator(ParallelBatchIterator): """ Randomly changes images in the batch. Behaviour can be defined in params.py. """ def __init__(self, keys, batch_size, std, mean, coates_features=None, y_all=None, n_eyes=1): super(AugmentingParallelBatchIterator, self).__init__(keys, batch_size, std, mean, coates_features, y_all, n_eyes=n_eyes) # Initialize augmenter self.augmenter = Augmenter() def transform(self, Xb, yb): Xbb = self.augmenter.augment(Xb) # Do normalization in super-method Xbb, yb = super(AugmentingParallelBatchIterator, self).transform(Xbb, yb) return Xbb, yb
def __init__(self, keys, batch_size, std, mean, coates_features=None, y_all=None, n_eyes=1): super(AugmentingParallelBatchIterator, self).__init__(keys, batch_size, std, mean, coates_features, y_all, n_eyes=n_eyes) # Initialize augmenter self.augmenter = Augmenter()
def __init__(self, keys, batch_size, std, mean, coates_features = None, cv = False, n_eyes = 1): super(TTABatchIterator, self).__init__(keys, batch_size, std, mean, coates_features = coates_features, test = True, cv = cv,n_eyes = n_eyes) # Initialize augmenter self.augmenter = Augmenter() self.i = 0 self.rotations = [0, 45, 90, 135, 180, 225, 270, 315] self.flips = [True, False] self.hue = [0] self.saturation = [0] self.ttas = len(self.rotations) * len(self.flips) * len(self.hue) * len(self.saturation) self.n_eyes = n_eyes
class AugmentingParallelBatchIterator(ParallelBatchIterator): """ Randomly changes images in the batch. Behaviour can be defined in params.py. """ def __init__(self, keys, batch_size, std, mean, coates_features = None, y_all = None, n_eyes=1): super(AugmentingParallelBatchIterator, self).__init__(keys, batch_size, std, mean, coates_features, y_all, n_eyes=n_eyes) # Initialize augmenter self.augmenter = Augmenter() def transform(self, Xb, yb): Xbb = self.augmenter.augment(Xb) # Do normalization in super-method Xbb, yb = super(AugmentingParallelBatchIterator, self).transform(Xbb, yb) return Xbb, yb
class TTABatchIterator(ParallelBatchIterator): def __init__(self, keys, batch_size, std, mean, coates_features = None, cv = False, n_eyes = 1): super(TTABatchIterator, self).__init__(keys, batch_size, std, mean, coates_features = coates_features, test = True, cv = cv,n_eyes = n_eyes) # Initialize augmenter self.augmenter = Augmenter() self.i = 0 self.rotations = [0, 45, 90, 135, 180, 225, 270, 315] self.flips = [True, False] self.hue = [0] self.saturation = [0] self.ttas = len(self.rotations) * len(self.flips) * len(self.hue) * len(self.saturation) self.n_eyes = n_eyes def transform(self, Xb, yb): if params.MULTIPROCESS: print "Batch %i/%i" % (self.i, self.X.shape[0]/self.batch_size/params.N_PRODUCERS) else: print "Batch %i/%i (%.2f%%)" % (self.i, self.X.shape[0]/self.batch_size*self.n_eyes, float(self.i) / (self.X.shape[0] / self.batch_size * self.n_eyes) * 100) self.i += 1 Xbb_list = [] for h in self.hue: for s in self.saturation: for r in self.rotations: for f in self.flips: Xbb_new = self.augmenter.augment_with_params(Xb, 0, 0, r, f, 1, h, s, 0) # Normalize Xbb_new, _ = super(TTABatchIterator, self).transform(Xbb_new, None) # Extend if batch size too small if Xbb_new.shape[0] < self.batch_size: Xbb_new = np.vstack([Xbb_new, np.zeros((self.batch_size - Xbb_new.shape[0], Xbb_new.shape[1], Xbb_new.shape[2], Xbb_new.shape[3]), dtype=np.float32)]) Xbb_list.append(Xbb_new) # yb are the keys of this batch, in order. return np.vstack(Xbb_list), yb
def __init__(self, cfg: DatasetConfig): self.cfg = cfg self.augmenter = Augmenter(cfg.augmenter_config)
class DatasetManager(): def __init__(self, cfg: DatasetConfig): self.cfg = cfg self.augmenter = Augmenter(cfg.augmenter_config) def copy_datasets(self): for dataset in self.cfg.datasets: shutil.copytree(dataset.src, dataset.dst, dirs_exist_ok=True) def augment_datasets(self): for dataset in self.cfg.datasets: img_target = dataset.train_target + dataset.val_target self.augmenter.process_recursive_directories( dataset.src, dataset.dst / 'aug', img_target, self.cfg.cpu_threads) def split_datasets(self): for dataset in self.cfg.datasets: split_dataset(dataset.dst / 'aug', dataset.train_target, dataset.dst / 'train', dataset.dst / 'val') if not self.cfg.delete_individual_dsts: shutil.rmtree(dataset.dst / 'aug') def merge_datasets(self): for dataset in self.cfg.datasets: shutil.move(dataset.dst / 'train', self.cfg.dst / 'train') shutil.move(dataset.dst / 'val', self.cfg.dst / 'val') def process_asm(self): self.augment_datasets() logging.info('Augmenting done! Splitting...') self.split_datasets() logging.info('Splitting done! Merging...') self.merge_datasets() logging.info('Merging done! Have a good day!') def process_sam(self): logging.info('SAM (split-augment-merge) processing...') for dataset in self.cfg.datasets: ratio = dataset.train_target / (dataset.train_target + dataset.val_target) logging.info(f'{dataset.name} dataset train/val ratio: {ratio}') split_dataset_by_ratio(dataset.src, ratio, dataset.dst / 'train', dataset.dst / 'val') logging.info('Splitting done! Augmenting...') for dataset in self.cfg.datasets: self.augmenter.process_recursive_directories( dataset.src, dataset.dst / 'aug' / 'train', dataset.train_target, self.cfg.cpu_threads) self.augmenter.process_recursive_directories( dataset.src, dataset.dst / 'aug' / 'val', dataset.val_target, self.cfg.cpu_threads) logging.info('Augmenting done! Merging...') dst = self.cfg.dst if not self.cfg.create_resized_versions else self.cfg.dst / f'orig_{self.cfg.augmenter_config.width}x{self.cfg.augmenter_config.height}' for dataset in self.cfg.datasets: shutil.copytree(dataset.dst / 'aug' / 'train', dst / 'train', dirs_exist_ok=True, copy_function=copy_wrapper) shutil.copytree(dataset.dst / 'aug' / 'val', dst / 'val', dirs_exist_ok=True, copy_function=copy_wrapper) if self.cfg.create_resized_versions: logging.info('Merging done! Resizing...') sizes = list(map(int, self.cfg.size_list)) resize_dataset(dst / 'train', self.cfg.dst, sizes, prefix='train') resize_dataset(dst / 'val', self.cfg.dst, sizes, prefix='val') logging.info('Resizing done! Cleaning up...') else: logging.info('Merging done! Cleaning up...') if not self.cfg.delete_individual_dsts: logging.info('Merging/resizing done! Have a good day!') return for dataset in self.cfg.datasets: shutil.rmtree(dataset.dst) logging.info('Cleaning done! Have a good day!')
def __init__(self, keys, batch_size, std, mean, coates_features = None, y_all = None, n_eyes=1): super(AugmentingParallelBatchIterator, self).__init__(keys, batch_size, std, mean, coates_features, y_all, n_eyes=n_eyes) # Initialize augmenter self.augmenter = Augmenter()
print "Defining network" network = define_network(inputs) print "Defining loss function" loss, val_fn = define_loss(network, targets) print "Defining learning function" train_fn = define_learning(network, loss) print "Loading data" train_X, train_y, val_X, val_y, test_X, test_y, label_to_names = data.load() print "Determining mean and std of train set" mean, std = normalize.calc_mean_std(train_X) a = Augmenter(multiprocess=True) # The number of epochs specifies the number of passes over the whole training data # Depending on augmentation settings, it still improves through epoch 100.. num_epochs = 100 #Take subset? Speeds it up x2, but worse performance ofc #train_X = train_X[:20000] #train_y = train_y[:20000] if params.HISTOGRAM_EQUALIZATION: adaptive = params.CLAHE print "Performing equalization (adaptive={})".format(adaptive) train_X = histogram_equalization(train_X, adaptive) val_X = histogram_equalization(val_X, adaptive) test_X = histogram_equalization(test_X, adaptive)
class TTABatchIterator(ParallelBatchIterator): def __init__(self, keys, batch_size, std, mean, coates_features=None, cv=False, n_eyes=1): super(TTABatchIterator, self).__init__(keys, batch_size, std, mean, coates_features=coates_features, test=True, cv=cv, n_eyes=n_eyes) # Initialize augmenter self.augmenter = Augmenter() self.i = 0 self.rotations = [0, 45, 90, 135, 180, 225, 270, 315] self.flips = [True, False] self.hue = [0] self.saturation = [0] self.ttas = len(self.rotations) * len(self.flips) * len( self.hue) * len(self.saturation) self.n_eyes = n_eyes def transform(self, Xb, yb): if params.MULTIPROCESS: print "Batch %i/%i" % (self.i, self.X.shape[0] / self.batch_size / params.N_PRODUCERS) else: print "Batch %i/%i (%.2f%%)" % ( self.i, self.X.shape[0] / self.batch_size * self.n_eyes, float(self.i) / (self.X.shape[0] / self.batch_size * self.n_eyes) * 100) self.i += 1 Xbb_list = [] for h in self.hue: for s in self.saturation: for r in self.rotations: for f in self.flips: Xbb_new = self.augmenter.augment_with_params( Xb, 0, 0, r, f, 1, h, s, 0) # Normalize Xbb_new, _ = super(TTABatchIterator, self).transform(Xbb_new, None) # Extend if batch size too small if Xbb_new.shape[0] < self.batch_size: Xbb_new = np.vstack([ Xbb_new, np.zeros((self.batch_size - Xbb_new.shape[0], Xbb_new.shape[1], Xbb_new.shape[2], Xbb_new.shape[3]), dtype=np.float32) ]) Xbb_list.append(Xbb_new) # yb are the keys of this batch, in order. return np.vstack(Xbb_list), yb
batch_size=32) # evaluate built network predictions = model.predict(testX, batch_size=32) print( classification_report(testY.argmax(axis=1), predictions.argmax(axis=1), target_names=binarizer.classes_)) model_file_name = "model_%s.h5" % round(time.time()) model.save(model_file_name) print("Saved model to disk as %s" % model_file_name) if __name__ == "__main__": # Train the model using 5 default image augmentation algorthims transformation_classes = [ TransformOriginal, TransformFlip, TransformBlur, TransformSharpen, TransformBrighten, TransformBlurFlip, TransformSharpenFlip, TransformBrightenFlip ] db_path = "../../webserver/trainingdata.db" augmenter = Augmenter(path=db_path, transformation_classes=transformation_classes) data = augmenter.augment_data(save_text_output=False, save_histogram=False) try: train_model(data) except: print("Training model failed.")
def __init__(self, mode='train_with_val', datasets=None, options=_DEFAULT_DS_TRAIN_OPTIONS): """Initialize the MixedDataset object Args: mode: Possible options: 'train_noval', 'val', 'train_with_val' or 'test' datasets: List of dataset objects to mix samples from options: see _DEFAULT_DS_TRAIN_OPTIONS comments """ # Only options supported in this initial implementation assert (mode in ['train_noval', 'val', 'train_with_val', 'test']) self.mode = mode self.opts = options # Combine dataset fields self._trn_IDs, self._val_IDs, self._tst_IDs = [], [], [] self._trn_IDs_simpl, self._val_IDs_simpl, self._tst_IDs_simpl = [], [], [] self._img_trn_path, self._img_val_path, self._img_tst_path = [], [], [] self._lbl_trn_path, self._lbl_val_path, self._pred_lbl_val_path, self._pred_lbl_tst_path = [], [], [], [] self.min_flow = np.finfo(np.float32).max self.avg_flow = [] self.max_flow = 0. for ds in datasets: if ds._trn_IDs is not None: self._trn_IDs.extend(ds._trn_IDs) if ds._val_IDs is not None: self._val_IDs.extend(ds._val_IDs) if ds._tst_IDs is not None: self._tst_IDs.extend(ds._tst_IDs) if ds._trn_IDs_simpl is not None: self._trn_IDs_simpl.extend(ds._trn_IDs_simpl) if ds._val_IDs_simpl is not None: self._val_IDs_simpl.extend(ds._val_IDs_simpl) if ds._tst_IDs_simpl is not None: self._tst_IDs_simpl.extend(ds._tst_IDs_simpl) if ds._img_trn_path is not None: self._img_trn_path.extend(ds._img_trn_path) if ds._img_val_path is not None: self._img_val_path.extend(ds._img_val_path) if ds._img_tst_path is not None: self._img_tst_path.extend(ds._img_tst_path) if ds._lbl_trn_path is not None: self._lbl_trn_path.extend(ds._lbl_trn_path) if ds._lbl_val_path is not None: self._lbl_val_path.extend(ds._lbl_val_path) if ds._pred_lbl_val_path is not None: self._pred_lbl_val_path.extend(ds._pred_lbl_val_path) if ds._pred_lbl_tst_path is not None: self._pred_lbl_tst_path.extend(ds._pred_lbl_tst_path) self.min_flow = min(self.min_flow, ds.min_flow) self.avg_flow.append(ds.avg_flow) self.max_flow = max(self.max_flow, ds.max_flow) self.avg_flow = np.mean( self.avg_flow ) # yes, this is only an approximation of the average... # Load all data in memory, if requested if self.opts['in_memory']: self._preload_all_samples() # Shuffle the data and set trackers np.random.seed(self.opts['random_seed']) if self.mode in ['train_noval', 'train_with_val']: # Train over the original training set, in the first case self._trn_ptr = 0 self.trn_size = len(self._trn_IDs) self._trn_idx = np.arange(self.trn_size) np.random.shuffle(self._trn_idx) if self.mode == 'train_with_val': # Train over the training split, validate over the validation split, in the second case self._val_ptr = 0 self.val_size = len(self._val_IDs) self._val_idx = np.arange(self.val_size) np.random.shuffle(self._val_idx) if self.opts['tb_test_imgs'] is True: # Make test images available to model in training mode self._tst_ptr = 0 self.tst_size = len(self._tst_IDs) self._tst_idx = np.arange(self.tst_size) np.random.shuffle(self._tst_idx) # Instantiate augmenter, if requested if self.opts['aug_type'] is not None: assert (self.opts['aug_type'] in ['basic', 'heavy']) self._aug = Augmenter(self.opts) elif self.mode == 'val': # Validate over the validation split self._val_ptr = 0 self.val_size = len(self._val_IDs) self._val_idx = np.arange(self.val_size) np.random.shuffle(self._val_idx) else: # Test over the entire testing set self._tst_ptr = 0 self.tst_size = len(self._tst_IDs) self._tst_idx = np.arange(self.tst_size) np.random.shuffle(self._tst_idx)