def test_preprocessed_splitter(): class DemeanPreproc(): """Just for tests :)""" def apply(self, dataset, can_fit=False): topo_view = dataset.get_topological_view() if can_fit: self.mean = np.mean(topo_view) dataset.set_topological_view(topo_view - self.mean) data = np.arange(10) dataset = DenseDesignMatrixWrapper(topo_view=to_4d_array(data), y=np.zeros(10)) splitter = SingleFoldSplitter(n_folds=10, i_test_fold=9) preproc_splitter = PreprocessedSplitter(dataset_splitter=splitter, preprocessor=DemeanPreproc()) first_round_sets = preproc_splitter.get_train_valid_test(dataset) train_topo = first_round_sets['train'].get_topological_view() valid_topo = first_round_sets['valid'].get_topological_view() test_topo = first_round_sets['test'].get_topological_view() assert np.array_equal(train_topo, to_4d_array([-3.5, -2.5,-1.5,-0.5,0.5,1.5,2.5,3.5])) assert np.array_equal(valid_topo, to_4d_array([4.5])) assert np.array_equal(test_topo, to_4d_array([5.5])) second_round_set = preproc_splitter.get_train_merged_valid_test(dataset) train_topo = second_round_set['train'].get_topological_view() valid_topo = second_round_set['valid'].get_topological_view() test_topo = second_round_set['test'].get_topological_view() assert np.array_equal(train_topo, to_4d_array([-4,-3,-2,-1,0,1,2,3,4])) assert np.array_equal(valid_topo, to_4d_array([4])) assert np.array_equal(test_topo, to_4d_array([5]))
def test_preprocessed_splitter(): class DemeanPreproc(): """Just for tests :)""" def apply(self, dataset, can_fit=False): topo_view = dataset.get_topological_view() if can_fit: self.mean = np.mean(topo_view) dataset.set_topological_view(topo_view - self.mean) data = np.arange(10) dataset = DenseDesignMatrixWrapper(topo_view=to_4d_array(data), y=np.zeros(10)) splitter = SingleFoldSplitter(n_folds=10, i_test_fold=9) preproc_splitter = PreprocessedSplitter(dataset_splitter=splitter, preprocessor=DemeanPreproc()) first_round_sets = preproc_splitter.get_train_valid_test(dataset) train_topo = first_round_sets['train'].get_topological_view() valid_topo = first_round_sets['valid'].get_topological_view() test_topo = first_round_sets['test'].get_topological_view() assert np.array_equal( train_topo, to_4d_array([-3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5])) assert np.array_equal(valid_topo, to_4d_array([4.5])) assert np.array_equal(test_topo, to_4d_array([5.5])) second_round_set = preproc_splitter.get_train_merged_valid_test(dataset) train_topo = second_round_set['train'].get_topological_view() valid_topo = second_round_set['valid'].get_topological_view() test_topo = second_round_set['test'].get_topological_view() assert np.array_equal(train_topo, to_4d_array([-4, -3, -2, -1, 0, 1, 2, 3, 4])) assert np.array_equal(valid_topo, to_4d_array([4])) assert np.array_equal(test_topo, to_4d_array([5]))
def __init__(self, final_layer, dataset, splitter, preprocessor, iterator, loss_expression, updates_expression, updates_modifier, monitors, stop_criterion, remember_best_chan, run_after_early_stop, batch_modifier=None): self.final_layer = final_layer self.dataset = dataset self.dataset_provider = PreprocessedSplitter(splitter, preprocessor) self.preprocessor = preprocessor self.iterator = iterator self.loss_expression = loss_expression self.updates_expression = updates_expression self.updates_modifier = updates_modifier self.monitors = monitors self.stop_criterion = stop_criterion self.monitor_manager = MonitorManager(monitors) self.remember_extension = RememberBest(remember_best_chan) self.run_after_early_stop = run_after_early_stop self.batch_modifier = batch_modifier
def __init__(self, final_layer, dataset, splitter, preprocessor, iterator, loss_expression, updates_expression, updates_modifier, monitors, stop_criterion, remember_best_chan, run_after_early_stop, batch_modifier=None): self.final_layer = final_layer self.dataset = dataset self.dataset_provider = PreprocessedSplitter(splitter, preprocessor) self.preprocessor=preprocessor self.iterator = iterator self.loss_expression = loss_expression self.updates_expression = updates_expression self.updates_modifier = updates_modifier self.monitors = monitors self.stop_criterion = stop_criterion self.monitor_manager = MonitorManager(monitors) self.remember_extension = RememberBest(remember_best_chan) self.run_after_early_stop = run_after_early_stop self.batch_modifier = batch_modifier
class Experiment(object): def __init__(self, final_layer, dataset, splitter, preprocessor, iterator, loss_expression, updates_expression, updates_modifier, monitors, stop_criterion, remember_best_chan, run_after_early_stop, batch_modifier=None): self.final_layer = final_layer self.dataset = dataset self.dataset_provider = PreprocessedSplitter(splitter, preprocessor) self.preprocessor=preprocessor self.iterator = iterator self.loss_expression = loss_expression self.updates_expression = updates_expression self.updates_modifier = updates_modifier self.monitors = monitors self.stop_criterion = stop_criterion self.monitor_manager = MonitorManager(monitors) self.remember_extension = RememberBest(remember_best_chan) self.run_after_early_stop = run_after_early_stop self.batch_modifier = batch_modifier def setup(self, target_var=None): lasagne.random.set_rng(RandomState(9859295)) self.dataset.ensure_is_loaded() self.print_layer_sizes() log.info("Create theano functions...") self.create_theano_functions(target_var) # reset remember best extension in case you rerun some experiment self.remember_extension = RememberBest( self.remember_extension.chan_name) log.info("Done.") def print_layer_sizes(self): log.info("Layers...") # start on newline so everything starts from left end of terminal, # including input layer string log.info('\n' + layers_to_str(self.final_layer)) def create_theano_functions(self, target_var, deterministic_training=False): if target_var is None: if hasattr(self.dataset, 'get_dummy_y'): log.info("Use dataset-supplied dummy y to determine " "shape and type of target variable") dummy_y = self.dataset.get_dummy_y() # tensor with as many dimensions as y target_type = T.TensorType( dtype=dummy_y.dtype, broadcastable=[False]*len(dummy_y.shape)) target_var = target_type() else: log.info("Automatically determine size of target variable by example...") # get a dummy batch and determine target size # use test set since it is smaller # maybe memory is freed quicker # prevent reloading at this step? was_reloadable = self.dataset.reloadable self.dataset.reloadable = False test_set = self.dataset_provider.get_train_valid_test(self.dataset)['test'] self.dataset.reloadable = was_reloadable batches = self.iterator.get_batches(test_set, shuffle=False) dummy_batch = batches.next() dummy_y = dummy_batch[1] del test_set # tensor with as many dimensions as y target_type = T.TensorType( dtype=dummy_y.dtype, broadcastable=[False]*len(dummy_y.shape)) target_var = target_type() self.dataset.ensure_is_loaded() prediction = lasagne.layers.get_output(self.final_layer, deterministic=deterministic_training) # test as in during testing not as in "test set" test_prediction = lasagne.layers.get_output(self.final_layer, deterministic=True) # Loss function might need layers or not... try: loss = self.loss_expression(prediction, target_var).mean() test_loss = self.loss_expression(test_prediction, target_var).mean() except TypeError: loss = self.loss_expression(prediction, target_var, self.final_layer).mean() test_loss = self.loss_expression(test_prediction, target_var, self.final_layer).mean() # create parameter update expressions params = lasagne.layers.get_all_params(self.final_layer, trainable=True) updates = self.updates_expression(loss, params) if self.updates_modifier is not None: # put norm constraints on all layer, for now fixed to max kernel norm # 2 and max col norm 0.5 updates = self.updates_modifier.modify(updates, self.final_layer) input_var = lasagne.layers.get_all_layers(self.final_layer)[0].input_var # Store all parameters, including update params like adam params, # needed for resetting to best model after early stop # not sure why i am not only doing update params below # possibly because batch norm is not in update params? all_layer_params = lasagne.layers.get_all_params(self.final_layer) self.all_params = all_layer_params # now params from adam would still be missing... add them ... all_update_params = updates.keys() for param in all_update_params: if param not in self.all_params: self.all_params.append(param) self.train_func = theano.function([input_var, target_var], updates=updates) self.monitor_manager.create_theano_functions(input_var, target_var, test_prediction, test_loss) def run(self): log.info("Run until first stop...") self.run_until_early_stop() # always setup for second stop, in order to get best model # even if not running after early stop... log.info("Setup for second stop...") self.setup_after_stop_training() if self.run_after_early_stop: log.info("Run until second stop...") self.run_until_second_stop() self.readd_old_monitor_chans() def run_until_early_stop(self): log.info("Split/Preprocess datasets...") datasets = self.dataset_provider.get_train_valid_test(self.dataset) log.info("...Done") self.create_monitors(datasets) self.run_until_stop(datasets, remember_best=True) return datasets def run_until_stop(self, datasets, remember_best): self.monitor_epoch(datasets) self.print_epoch() if remember_best: self.remember_extension.remember_epoch(self.monitor_chans, self.all_params) self.iterator.reset_rng() while not self.stop_criterion.should_stop(self.monitor_chans): self.run_one_epoch(datasets, remember_best) def run_one_epoch(self, datasets, remember_best): batch_generator = self.iterator.get_batches(datasets['train'], shuffle=True) with log_timing(log, None, final_msg='Time updates following epoch:'): for inputs, targets in batch_generator: if self.batch_modifier is not None: inputs, targets = self.batch_modifier.process(inputs, targets) # could happen that batch modifier has removed all inputs... if len(inputs) > 0: self.train_func(inputs, targets) self.monitor_epoch(datasets) self.print_epoch() if remember_best: self.remember_extension.remember_epoch(self.monitor_chans, self.all_params) def setup_after_stop_training(self): # also remember old monitor chans, will be put back into # monitor chans after experiment finished self.old_monitor_chans = deepcopy(self.monitor_chans) self.remember_extension.reset_to_best_model(self.monitor_chans, self.all_params) loss_to_reach = self.monitor_chans['train_loss'][-1] self.stop_criterion = Or(stop_criteria=[ MaxEpochs(num_epochs=self.remember_extension.best_epoch * 2), ChanBelow(chan_name='valid_loss', target_value=loss_to_reach)]) log.info("Train loss to reach {:.5f}".format(loss_to_reach)) def run_until_second_stop(self): datasets = self.dataset_provider.get_train_merged_valid_test( self.dataset) self.run_until_stop(datasets, remember_best=False) def create_monitors(self, datasets): self.monitor_chans = OrderedDict() self.last_epoch_time = None for monitor in self.monitors: monitor.setup(self.monitor_chans, datasets) def monitor_epoch(self, all_datasets): self.monitor_manager.monitor_epoch(self.monitor_chans, all_datasets, self.iterator) def print_epoch(self): # -1 due to doing one monitor at start of training i_epoch = len(self.monitor_chans.values()[0]) - 1 log.info("Epoch {:d}".format(i_epoch)) for chan_name in self.monitor_chans: log.info("{:25s} {:.5f}".format(chan_name, self.monitor_chans[chan_name][-1])) log.info("") def readd_old_monitor_chans(self): for key in self.old_monitor_chans: new_key = 'before_reset_' + key self.monitor_chans[new_key] = self.old_monitor_chans[key]
class Experiment(object): def __init__(self, final_layer, dataset, splitter, preprocessor, iterator, loss_expression, updates_expression, updates_modifier, monitors, stop_criterion, remember_best_chan, run_after_early_stop, batch_modifier=None): self.final_layer = final_layer self.dataset = dataset self.dataset_provider = PreprocessedSplitter(splitter, preprocessor) self.preprocessor = preprocessor self.iterator = iterator self.loss_expression = loss_expression self.updates_expression = updates_expression self.updates_modifier = updates_modifier self.monitors = monitors self.stop_criterion = stop_criterion self.monitor_manager = MonitorManager(monitors) self.remember_extension = RememberBest(remember_best_chan) self.run_after_early_stop = run_after_early_stop self.batch_modifier = batch_modifier def setup(self, target_var=None): lasagne.random.set_rng(RandomState(9859295)) self.dataset.ensure_is_loaded() self.print_layer_sizes() log.info("Create theano functions...") self.create_theano_functions(target_var) # reset remember best extension in case you rerun some experiment self.remember_extension = RememberBest( self.remember_extension.chan_name) log.info("Done.") def print_layer_sizes(self): log.info("Layers...") # start on newline so everything starts from left end of terminal, # including input layer string log.info('\n' + layers_to_str(self.final_layer)) def create_theano_functions(self, target_var, deterministic_training=False): if target_var is None: if hasattr(self.dataset, 'get_dummy_y'): log.info("Use dataset-supplied dummy y to determine " "shape and type of target variable") dummy_y = self.dataset.get_dummy_y() # tensor with as many dimensions as y target_type = T.TensorType(dtype=dummy_y.dtype, broadcastable=[False] * len(dummy_y.shape)) target_var = target_type() else: log.info( "Automatically determine size of target variable by example..." ) # get a dummy batch and determine target size # use test set since it is smaller # maybe memory is freed quicker # prevent reloading at this step? was_reloadable = self.dataset.reloadable self.dataset.reloadable = False test_set = self.dataset_provider.get_train_valid_test( self.dataset)['test'] self.dataset.reloadable = was_reloadable batches = self.iterator.get_batches(test_set, shuffle=False) dummy_batch = batches.next() dummy_y = dummy_batch[1] del test_set # tensor with as many dimensions as y target_type = T.TensorType(dtype=dummy_y.dtype, broadcastable=[False] * len(dummy_y.shape)) target_var = target_type() self.dataset.ensure_is_loaded() prediction = lasagne.layers.get_output( self.final_layer, deterministic=deterministic_training) # test as in during testing not as in "test set" test_prediction = lasagne.layers.get_output(self.final_layer, deterministic=True) # Loss function might need layers or not... try: loss = self.loss_expression(prediction, target_var).mean() test_loss = self.loss_expression(test_prediction, target_var).mean() except TypeError: loss = self.loss_expression(prediction, target_var, self.final_layer).mean() test_loss = self.loss_expression(test_prediction, target_var, self.final_layer).mean() # create parameter update expressions params = lasagne.layers.get_all_params(self.final_layer, trainable=True) updates = self.updates_expression(loss, params) if self.updates_modifier is not None: # put norm constraints on all layer, for now fixed to max kernel norm # 2 and max col norm 0.5 updates = self.updates_modifier.modify(updates, self.final_layer) input_var = lasagne.layers.get_all_layers( self.final_layer)[0].input_var # Store all parameters, including update params like adam params, # needed for resetting to best model after early stop # not sure why i am not only doing update params below # possibly because batch norm is not in update params? all_layer_params = lasagne.layers.get_all_params(self.final_layer) self.all_params = all_layer_params # now params from adam would still be missing... add them ... all_update_params = updates.keys() for param in all_update_params: if param not in self.all_params: self.all_params.append(param) self.train_func = theano.function([input_var, target_var], updates=updates) self.monitor_manager.create_theano_functions(input_var, target_var, test_prediction, test_loss) def run(self): log.info("Run until first stop...") self.run_until_early_stop() # always setup for second stop, in order to get best model # even if not running after early stop... log.info("Setup for second stop...") self.setup_after_stop_training() if self.run_after_early_stop: log.info("Run until second stop...") self.run_until_second_stop() self.readd_old_monitor_chans() def run_until_early_stop(self): log.info("Split/Preprocess datasets...") datasets = self.dataset_provider.get_train_valid_test(self.dataset) log.info("...Done") self.create_monitors(datasets) self.run_until_stop(datasets, remember_best=True) return datasets def run_until_stop(self, datasets, remember_best): self.monitor_epoch(datasets) self.print_epoch() if remember_best: self.remember_extension.remember_epoch(self.monitor_chans, self.all_params) self.iterator.reset_rng() while not self.stop_criterion.should_stop(self.monitor_chans): self.run_one_epoch(datasets, remember_best) def run_one_epoch(self, datasets, remember_best): batch_generator = self.iterator.get_batches(datasets['train'], shuffle=True) with log_timing(log, None, final_msg='Time updates following epoch:'): for inputs, targets in batch_generator: if self.batch_modifier is not None: inputs, targets = self.batch_modifier.process( inputs, targets) # could happen that batch modifier has removed all inputs... if len(inputs) > 0: self.train_func(inputs, targets) self.monitor_epoch(datasets) self.print_epoch() if remember_best: self.remember_extension.remember_epoch(self.monitor_chans, self.all_params) def setup_after_stop_training(self): # also remember old monitor chans, will be put back into # monitor chans after experiment finished self.old_monitor_chans = deepcopy(self.monitor_chans) self.remember_extension.reset_to_best_model(self.monitor_chans, self.all_params) loss_to_reach = self.monitor_chans['train_loss'][-1] self.stop_criterion = Or(stop_criteria=[ MaxEpochs(num_epochs=self.remember_extension.best_epoch * 2), ChanBelow(chan_name='valid_loss', target_value=loss_to_reach) ]) log.info("Train loss to reach {:.5f}".format(loss_to_reach)) def run_until_second_stop(self): datasets = self.dataset_provider.get_train_merged_valid_test( self.dataset) self.run_until_stop(datasets, remember_best=False) def create_monitors(self, datasets): self.monitor_chans = OrderedDict() self.last_epoch_time = None for monitor in self.monitors: monitor.setup(self.monitor_chans, datasets) def monitor_epoch(self, all_datasets): self.monitor_manager.monitor_epoch(self.monitor_chans, all_datasets, self.iterator) def print_epoch(self): # -1 due to doing one monitor at start of training i_epoch = len(self.monitor_chans.values()[0]) - 1 log.info("Epoch {:d}".format(i_epoch)) for chan_name in self.monitor_chans: log.info("{:25s} {:.5f}".format(chan_name, self.monitor_chans[chan_name][-1])) log.info("") def readd_old_monitor_chans(self): for key in self.old_monitor_chans: new_key = 'before_reset_' + key self.monitor_chans[new_key] = self.old_monitor_chans[key]