def test_preprocessed_splitter():
    class DemeanPreproc():
        """Just for tests :)"""
        def apply(self, dataset, can_fit=False):
            topo_view = dataset.get_topological_view()
            if can_fit:
                self.mean = np.mean(topo_view)
            dataset.set_topological_view(topo_view - self.mean)

    data = np.arange(10)
    dataset = DenseDesignMatrixWrapper(topo_view=to_4d_array(data),
                                       y=np.zeros(10))
    splitter = SingleFoldSplitter(n_folds=10, i_test_fold=9)
    preproc_splitter = PreprocessedSplitter(dataset_splitter=splitter,
                                            preprocessor=DemeanPreproc())

    first_round_sets = preproc_splitter.get_train_valid_test(dataset)

    train_topo = first_round_sets['train'].get_topological_view()
    valid_topo = first_round_sets['valid'].get_topological_view()
    test_topo = first_round_sets['test'].get_topological_view()
    assert np.array_equal(
        train_topo, to_4d_array([-3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5]))
    assert np.array_equal(valid_topo, to_4d_array([4.5]))
    assert np.array_equal(test_topo, to_4d_array([5.5]))

    second_round_set = preproc_splitter.get_train_merged_valid_test(dataset)

    train_topo = second_round_set['train'].get_topological_view()
    valid_topo = second_round_set['valid'].get_topological_view()
    test_topo = second_round_set['test'].get_topological_view()
    assert np.array_equal(train_topo,
                          to_4d_array([-4, -3, -2, -1, 0, 1, 2, 3, 4]))
    assert np.array_equal(valid_topo, to_4d_array([4]))
    assert np.array_equal(test_topo, to_4d_array([5]))
def test_preprocessed_splitter():
    class DemeanPreproc():
        """Just for tests :)"""
        def apply(self, dataset, can_fit=False):
            topo_view = dataset.get_topological_view()
            if can_fit:
                self.mean = np.mean(topo_view)
            dataset.set_topological_view(topo_view - self.mean)


    data = np.arange(10)
    dataset = DenseDesignMatrixWrapper(topo_view=to_4d_array(data), y=np.zeros(10))
    splitter = SingleFoldSplitter(n_folds=10, i_test_fold=9)
    preproc_splitter = PreprocessedSplitter(dataset_splitter=splitter,
        preprocessor=DemeanPreproc())

    first_round_sets = preproc_splitter.get_train_valid_test(dataset)
    
    train_topo = first_round_sets['train'].get_topological_view()
    valid_topo = first_round_sets['valid'].get_topological_view()
    test_topo = first_round_sets['test'].get_topological_view()
    assert np.array_equal(train_topo, 
                          to_4d_array([-3.5, -2.5,-1.5,-0.5,0.5,1.5,2.5,3.5]))
    assert np.array_equal(valid_topo, to_4d_array([4.5]))
    assert np.array_equal(test_topo, to_4d_array([5.5]))
    
    second_round_set = preproc_splitter.get_train_merged_valid_test(dataset)
    
    train_topo = second_round_set['train'].get_topological_view()
    valid_topo = second_round_set['valid'].get_topological_view()
    test_topo = second_round_set['test'].get_topological_view()
    assert np.array_equal(train_topo, to_4d_array([-4,-3,-2,-1,0,1,2,3,4]))
    assert np.array_equal(valid_topo, to_4d_array([4]))
    assert np.array_equal(test_topo, to_4d_array([5]))
Example #3
0
class Experiment(object):
    def __init__(self, final_layer, dataset, splitter, preprocessor,
            iterator, loss_expression, updates_expression, updates_modifier,
            monitors, stop_criterion, remember_best_chan, run_after_early_stop,
            batch_modifier=None):
        self.final_layer = final_layer
        self.dataset = dataset
        self.dataset_provider = PreprocessedSplitter(splitter, preprocessor)
        self.preprocessor=preprocessor
        self.iterator = iterator
        self.loss_expression = loss_expression
        self.updates_expression = updates_expression
        self.updates_modifier = updates_modifier
        self.monitors = monitors
        self.stop_criterion = stop_criterion
        self.monitor_manager = MonitorManager(monitors)
        self.remember_extension = RememberBest(remember_best_chan)
        self.run_after_early_stop = run_after_early_stop
        self.batch_modifier = batch_modifier
    
    def setup(self, target_var=None):
        lasagne.random.set_rng(RandomState(9859295))
        self.dataset.ensure_is_loaded()
        self.print_layer_sizes()
        log.info("Create theano functions...")
        self.create_theano_functions(target_var)
        # reset remember best extension in case you rerun some experiment
        self.remember_extension = RememberBest(
            self.remember_extension.chan_name)
        log.info("Done.")

    def print_layer_sizes(self):
        log.info("Layers...")
        # start on newline so everything starts from left end of terminal, 
        # including input layer string
        log.info('\n' + layers_to_str(self.final_layer))
    
    def create_theano_functions(self, target_var, deterministic_training=False):
        if target_var is None:
            if hasattr(self.dataset, 'get_dummy_y'):
                log.info("Use dataset-supplied dummy y to determine "
                    "shape and type of target variable")
                dummy_y = self.dataset.get_dummy_y()
                # tensor with as many dimensions as y
                target_type = T.TensorType(
                    dtype=dummy_y.dtype,
                    broadcastable=[False]*len(dummy_y.shape))
                target_var = target_type()
            else:
                log.info("Automatically determine size of target variable by example...")
                # get a dummy batch and determine target size
                # use test set since it is smaller
                # maybe memory is freed quicker
                
                # prevent reloading at this step?
                was_reloadable = self.dataset.reloadable
                self.dataset.reloadable = False
                test_set = self.dataset_provider.get_train_valid_test(self.dataset)['test']
                self.dataset.reloadable = was_reloadable
                batches = self.iterator.get_batches(test_set, shuffle=False)
                dummy_batch = batches.next()
                dummy_y = dummy_batch[1]
                del test_set
                # tensor with as many dimensions as y
                target_type = T.TensorType(
                    dtype=dummy_y.dtype,
                    broadcastable=[False]*len(dummy_y.shape))
                target_var = target_type()
                self.dataset.ensure_is_loaded()
        
        prediction = lasagne.layers.get_output(self.final_layer,
            deterministic=deterministic_training)
        
        # test as in during testing not as in "test set"
        test_prediction = lasagne.layers.get_output(self.final_layer, 
            deterministic=True)
        # Loss function might need layers or not...
        try:
            loss = self.loss_expression(prediction, target_var).mean()
            test_loss = self.loss_expression(test_prediction, target_var).mean()
        except TypeError:
            loss = self.loss_expression(prediction, target_var, self.final_layer).mean()
            test_loss = self.loss_expression(test_prediction, target_var, self.final_layer).mean()
            
        # create parameter update expressions
        params = lasagne.layers.get_all_params(self.final_layer, trainable=True)
        updates = self.updates_expression(loss, params)
        if self.updates_modifier is not None:
            # put norm constraints on all layer, for now fixed to max kernel norm
            # 2 and max col norm 0.5
            updates = self.updates_modifier.modify(updates, self.final_layer)
        input_var = lasagne.layers.get_all_layers(self.final_layer)[0].input_var
        # Store all parameters, including update params like adam params,
        # needed for resetting to best model after early stop
        # not sure why i am not only doing update params below
        # possibly because batch norm is not in update params?
        all_layer_params = lasagne.layers.get_all_params(self.final_layer)
        self.all_params = all_layer_params
        # now params from adam would still be missing... add them ...
        all_update_params = updates.keys()
        for param in all_update_params:
            if param not in self.all_params:
                self.all_params.append(param)

        self.train_func = theano.function([input_var, target_var], updates=updates)
        self.monitor_manager.create_theano_functions(input_var, target_var,
            test_prediction, test_loss)
        
    def run(self):
        log.info("Run until first stop...")
        self.run_until_early_stop()
        # always setup for second stop, in order to get best model
        # even if not running after early stop...
        log.info("Setup for second stop...")
        self.setup_after_stop_training()
        if self.run_after_early_stop:
            log.info("Run until second stop...")
            self.run_until_second_stop()
            self.readd_old_monitor_chans()

    def run_until_early_stop(self):
        log.info("Split/Preprocess datasets...")
        datasets = self.dataset_provider.get_train_valid_test(self.dataset)
        log.info("...Done")
        self.create_monitors(datasets)
        self.run_until_stop(datasets, remember_best=True)
        return datasets

    def run_until_stop(self, datasets, remember_best):
        self.monitor_epoch(datasets)
        self.print_epoch()
        if remember_best:
            self.remember_extension.remember_epoch(self.monitor_chans,
                self.all_params)

        self.iterator.reset_rng()
        while not self.stop_criterion.should_stop(self.monitor_chans):
            self.run_one_epoch(datasets, remember_best)

    def run_one_epoch(self, datasets, remember_best):
        batch_generator = self.iterator.get_batches(datasets['train'],
            shuffle=True)
        with log_timing(log, None, final_msg='Time updates following epoch:'):
            for inputs, targets in batch_generator:
                if self.batch_modifier is not None:
                    inputs, targets = self.batch_modifier.process(inputs,
                        targets)
                # could happen that batch modifier has removed all inputs...
                if len(inputs) > 0:
                    self.train_func(inputs, targets)
        
        self.monitor_epoch(datasets)
        self.print_epoch()
        if remember_best:
            self.remember_extension.remember_epoch(self.monitor_chans,
                self.all_params)

    def setup_after_stop_training(self):
        # also remember old monitor chans, will be put back into
        # monitor chans after experiment finished
        self.old_monitor_chans = deepcopy(self.monitor_chans)
        self.remember_extension.reset_to_best_model(self.monitor_chans,
                self.all_params)
        loss_to_reach = self.monitor_chans['train_loss'][-1]
        self.stop_criterion = Or(stop_criteria=[
            MaxEpochs(num_epochs=self.remember_extension.best_epoch * 2),
            ChanBelow(chan_name='valid_loss', target_value=loss_to_reach)])
        log.info("Train loss to reach {:.5f}".format(loss_to_reach))

    def run_until_second_stop(self):
        datasets = self.dataset_provider.get_train_merged_valid_test(
            self.dataset)
        self.run_until_stop(datasets, remember_best=False)

    def create_monitors(self, datasets):
        self.monitor_chans = OrderedDict()
        self.last_epoch_time = None
        for monitor in self.monitors:
            monitor.setup(self.monitor_chans, datasets)
            
    def monitor_epoch(self, all_datasets):
        self.monitor_manager.monitor_epoch(self.monitor_chans, all_datasets, 
            self.iterator)

    def print_epoch(self):
        # -1 due to doing one monitor at start of training
        i_epoch = len(self.monitor_chans.values()[0]) - 1 
        log.info("Epoch {:d}".format(i_epoch))
        for chan_name in self.monitor_chans:
            log.info("{:25s} {:.5f}".format(chan_name,
                self.monitor_chans[chan_name][-1]))
        log.info("")
    
    def readd_old_monitor_chans(self):
        for key in self.old_monitor_chans:
            new_key = 'before_reset_' + key
            self.monitor_chans[new_key] = self.old_monitor_chans[key]
Example #4
0
class Experiment(object):
    def __init__(self,
                 final_layer,
                 dataset,
                 splitter,
                 preprocessor,
                 iterator,
                 loss_expression,
                 updates_expression,
                 updates_modifier,
                 monitors,
                 stop_criterion,
                 remember_best_chan,
                 run_after_early_stop,
                 batch_modifier=None):
        self.final_layer = final_layer
        self.dataset = dataset
        self.dataset_provider = PreprocessedSplitter(splitter, preprocessor)
        self.preprocessor = preprocessor
        self.iterator = iterator
        self.loss_expression = loss_expression
        self.updates_expression = updates_expression
        self.updates_modifier = updates_modifier
        self.monitors = monitors
        self.stop_criterion = stop_criterion
        self.monitor_manager = MonitorManager(monitors)
        self.remember_extension = RememberBest(remember_best_chan)
        self.run_after_early_stop = run_after_early_stop
        self.batch_modifier = batch_modifier

    def setup(self, target_var=None):
        lasagne.random.set_rng(RandomState(9859295))
        self.dataset.ensure_is_loaded()
        self.print_layer_sizes()
        log.info("Create theano functions...")
        self.create_theano_functions(target_var)
        # reset remember best extension in case you rerun some experiment
        self.remember_extension = RememberBest(
            self.remember_extension.chan_name)
        log.info("Done.")

    def print_layer_sizes(self):
        log.info("Layers...")
        # start on newline so everything starts from left end of terminal,
        # including input layer string
        log.info('\n' + layers_to_str(self.final_layer))

    def create_theano_functions(self,
                                target_var,
                                deterministic_training=False):
        if target_var is None:
            if hasattr(self.dataset, 'get_dummy_y'):
                log.info("Use dataset-supplied dummy y to determine "
                         "shape and type of target variable")
                dummy_y = self.dataset.get_dummy_y()
                # tensor with as many dimensions as y
                target_type = T.TensorType(dtype=dummy_y.dtype,
                                           broadcastable=[False] *
                                           len(dummy_y.shape))
                target_var = target_type()
            else:
                log.info(
                    "Automatically determine size of target variable by example..."
                )
                # get a dummy batch and determine target size
                # use test set since it is smaller
                # maybe memory is freed quicker

                # prevent reloading at this step?
                was_reloadable = self.dataset.reloadable
                self.dataset.reloadable = False
                test_set = self.dataset_provider.get_train_valid_test(
                    self.dataset)['test']
                self.dataset.reloadable = was_reloadable
                batches = self.iterator.get_batches(test_set, shuffle=False)
                dummy_batch = batches.next()
                dummy_y = dummy_batch[1]
                del test_set
                # tensor with as many dimensions as y
                target_type = T.TensorType(dtype=dummy_y.dtype,
                                           broadcastable=[False] *
                                           len(dummy_y.shape))
                target_var = target_type()
                self.dataset.ensure_is_loaded()

        prediction = lasagne.layers.get_output(
            self.final_layer, deterministic=deterministic_training)

        # test as in during testing not as in "test set"
        test_prediction = lasagne.layers.get_output(self.final_layer,
                                                    deterministic=True)
        # Loss function might need layers or not...
        try:
            loss = self.loss_expression(prediction, target_var).mean()
            test_loss = self.loss_expression(test_prediction,
                                             target_var).mean()
        except TypeError:
            loss = self.loss_expression(prediction, target_var,
                                        self.final_layer).mean()
            test_loss = self.loss_expression(test_prediction, target_var,
                                             self.final_layer).mean()

        # create parameter update expressions
        params = lasagne.layers.get_all_params(self.final_layer,
                                               trainable=True)
        updates = self.updates_expression(loss, params)
        if self.updates_modifier is not None:
            # put norm constraints on all layer, for now fixed to max kernel norm
            # 2 and max col norm 0.5
            updates = self.updates_modifier.modify(updates, self.final_layer)
        input_var = lasagne.layers.get_all_layers(
            self.final_layer)[0].input_var
        # Store all parameters, including update params like adam params,
        # needed for resetting to best model after early stop
        # not sure why i am not only doing update params below
        # possibly because batch norm is not in update params?
        all_layer_params = lasagne.layers.get_all_params(self.final_layer)
        self.all_params = all_layer_params
        # now params from adam would still be missing... add them ...
        all_update_params = updates.keys()
        for param in all_update_params:
            if param not in self.all_params:
                self.all_params.append(param)

        self.train_func = theano.function([input_var, target_var],
                                          updates=updates)
        self.monitor_manager.create_theano_functions(input_var, target_var,
                                                     test_prediction,
                                                     test_loss)

    def run(self):
        log.info("Run until first stop...")
        self.run_until_early_stop()
        # always setup for second stop, in order to get best model
        # even if not running after early stop...
        log.info("Setup for second stop...")
        self.setup_after_stop_training()
        if self.run_after_early_stop:
            log.info("Run until second stop...")
            self.run_until_second_stop()
            self.readd_old_monitor_chans()

    def run_until_early_stop(self):
        log.info("Split/Preprocess datasets...")
        datasets = self.dataset_provider.get_train_valid_test(self.dataset)
        log.info("...Done")
        self.create_monitors(datasets)
        self.run_until_stop(datasets, remember_best=True)
        return datasets

    def run_until_stop(self, datasets, remember_best):
        self.monitor_epoch(datasets)
        self.print_epoch()
        if remember_best:
            self.remember_extension.remember_epoch(self.monitor_chans,
                                                   self.all_params)

        self.iterator.reset_rng()
        while not self.stop_criterion.should_stop(self.monitor_chans):
            self.run_one_epoch(datasets, remember_best)

    def run_one_epoch(self, datasets, remember_best):
        batch_generator = self.iterator.get_batches(datasets['train'],
                                                    shuffle=True)
        with log_timing(log, None, final_msg='Time updates following epoch:'):
            for inputs, targets in batch_generator:
                if self.batch_modifier is not None:
                    inputs, targets = self.batch_modifier.process(
                        inputs, targets)
                # could happen that batch modifier has removed all inputs...
                if len(inputs) > 0:
                    self.train_func(inputs, targets)

        self.monitor_epoch(datasets)
        self.print_epoch()
        if remember_best:
            self.remember_extension.remember_epoch(self.monitor_chans,
                                                   self.all_params)

    def setup_after_stop_training(self):
        # also remember old monitor chans, will be put back into
        # monitor chans after experiment finished
        self.old_monitor_chans = deepcopy(self.monitor_chans)
        self.remember_extension.reset_to_best_model(self.monitor_chans,
                                                    self.all_params)
        loss_to_reach = self.monitor_chans['train_loss'][-1]
        self.stop_criterion = Or(stop_criteria=[
            MaxEpochs(num_epochs=self.remember_extension.best_epoch * 2),
            ChanBelow(chan_name='valid_loss', target_value=loss_to_reach)
        ])
        log.info("Train loss to reach {:.5f}".format(loss_to_reach))

    def run_until_second_stop(self):
        datasets = self.dataset_provider.get_train_merged_valid_test(
            self.dataset)
        self.run_until_stop(datasets, remember_best=False)

    def create_monitors(self, datasets):
        self.monitor_chans = OrderedDict()
        self.last_epoch_time = None
        for monitor in self.monitors:
            monitor.setup(self.monitor_chans, datasets)

    def monitor_epoch(self, all_datasets):
        self.monitor_manager.monitor_epoch(self.monitor_chans, all_datasets,
                                           self.iterator)

    def print_epoch(self):
        # -1 due to doing one monitor at start of training
        i_epoch = len(self.monitor_chans.values()[0]) - 1
        log.info("Epoch {:d}".format(i_epoch))
        for chan_name in self.monitor_chans:
            log.info("{:25s} {:.5f}".format(chan_name,
                                            self.monitor_chans[chan_name][-1]))
        log.info("")

    def readd_old_monitor_chans(self):
        for key in self.old_monitor_chans:
            new_key = 'before_reset_' + key
            self.monitor_chans[new_key] = self.old_monitor_chans[key]