def test_first_fold(): data = np.arange(10) dataset = DenseDesignMatrixWrapper(topo_view=to_4d_array(data), y=np.zeros(10)) splitter = SingleFoldSplitter(n_folds=10, i_test_fold=0) datasets= splitter.split_into_train_valid_test(dataset) assert np.array_equal(to_4d_array(np.arange(1,9)), datasets['train'].get_topological_view() ) assert np.array_equal(to_4d_array([9]), datasets['valid'].get_topological_view() ) assert np.array_equal(to_4d_array([0]), datasets['test'].get_topological_view() )
def test_first_fold(): data = np.arange(10) dataset = DenseDesignMatrixWrapper(topo_view=to_4d_array(data), y=np.zeros(10)) splitter = SingleFoldSplitter(n_folds=10, i_test_fold=0) datasets = splitter.split_into_train_valid_test(dataset) assert np.array_equal(to_4d_array(np.arange(1, 9)), datasets['train'].get_topological_view()) assert np.array_equal(to_4d_array([9]), datasets['valid'].get_topological_view()) assert np.array_equal(to_4d_array([0]), datasets['test'].get_topological_view())
def test_repeated_calls_with_shuffle(): """Repeated calls should always lead to same split""" data = np.arange(100) dataset = DenseDesignMatrixWrapper(topo_view=to_4d_array(data), y=np.zeros(100)) splitter = SingleFoldSplitter(n_folds=10, i_test_fold=9, shuffle=True) reference_datasets = splitter.split_into_train_valid_test(dataset) # 20 attemptsat splitting should all lead to same datasets! for _ in range(20): new_datasets = splitter.split_into_train_valid_test(dataset) for key in reference_datasets: assert np.array_equal(reference_datasets[key].get_topological_view(), new_datasets[key].get_topological_view())
def test_repeated_calls_with_shuffle(): """Repeated calls should always lead to same split""" data = np.arange(100) dataset = DenseDesignMatrixWrapper(topo_view=to_4d_array(data), y=np.zeros(100)) splitter = SingleFoldSplitter(n_folds=10, i_test_fold=9, shuffle=True) reference_datasets = splitter.split_into_train_valid_test(dataset) # 20 attemptsat splitting should all lead to same datasets! for _ in range(20): new_datasets = splitter.split_into_train_valid_test(dataset) for key in reference_datasets: assert np.array_equal( reference_datasets[key].get_topological_view(), new_datasets[key].get_topological_view())
def test_preprocessed_splitter(): class DemeanPreproc(): """Just for tests :)""" def apply(self, dataset, can_fit=False): topo_view = dataset.get_topological_view() if can_fit: self.mean = np.mean(topo_view) dataset.set_topological_view(topo_view - self.mean) data = np.arange(10) dataset = DenseDesignMatrixWrapper(topo_view=to_4d_array(data), y=np.zeros(10)) splitter = SingleFoldSplitter(n_folds=10, i_test_fold=9) preproc_splitter = PreprocessedSplitter(dataset_splitter=splitter, preprocessor=DemeanPreproc()) first_round_sets = preproc_splitter.get_train_valid_test(dataset) train_topo = first_round_sets['train'].get_topological_view() valid_topo = first_round_sets['valid'].get_topological_view() test_topo = first_round_sets['test'].get_topological_view() assert np.array_equal( train_topo, to_4d_array([-3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5])) assert np.array_equal(valid_topo, to_4d_array([4.5])) assert np.array_equal(test_topo, to_4d_array([5.5])) second_round_set = preproc_splitter.get_train_merged_valid_test(dataset) train_topo = second_round_set['train'].get_topological_view() valid_topo = second_round_set['valid'].get_topological_view() test_topo = second_round_set['test'].get_topological_view() assert np.array_equal(train_topo, to_4d_array([-4, -3, -2, -1, 0, 1, 2, 3, 4])) assert np.array_equal(valid_topo, to_4d_array([4])) assert np.array_equal(test_topo, to_4d_array([5]))
def run(self): self.all_layers = [] self.all_monitor_chans = [] for i_fold in range(self.n_folds): log.info("Running fold {:d} of {:d}".format( i_fold + 1, self.n_folds)) this_layers = deepcopy(self.final_layer) this_exp_args = deepcopy(self.exp_args) ## make sure dataset is loaded... self.dataset.ensure_is_loaded() dataset_splitter = SingleFoldSplitter(n_folds=self.n_folds, i_test_fold=i_fold, shuffle=self.shuffle) exp = Experiment(this_layers, self.dataset, dataset_splitter, **this_exp_args) exp.setup() exp.run() self.all_layers.append(deepcopy(exp.final_layer)) self.all_monitor_chans.append(deepcopy(exp.monitor_chans))